http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestLogMergePolicy.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestLogMergePolicy.cs b/src/Lucene.Net.Tests/Index/TestLogMergePolicy.cs new file mode 100644 index 0000000..921b719 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestLogMergePolicy.cs @@ -0,0 +1,42 @@ +using NUnit.Framework; + +namespace Lucene.Net.Index +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + public class TestLogMergePolicy : BaseMergePolicyTestCase + { + protected internal override MergePolicy MergePolicy() + { + return NewLogMergePolicy(Random()); + } + + #region BaseMergePolicyTestCase + // LUCENENET NOTE: Tests in an abstract base class are not pulled into the correct + // context in Visual Studio. This fixes that with the minimum amount of code necessary + // to run them in the correct context without duplicating all of the tests. + + [Test] + public override void TestForceMergeNotNeeded() + { + base.TestForceMergeNotNeeded(); + } + + #endregion + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestLongPostings.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestLongPostings.cs b/src/Lucene.Net.Tests/Index/TestLongPostings.cs new file mode 100644 index 0000000..0f06912 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestLongPostings.cs @@ -0,0 +1,570 @@ +using Lucene.Net.Analysis.TokenAttributes; +using System; +using System.Diagnostics; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Randomized.Generators; + using NUnit.Framework; + using System.IO; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Analyzer = Lucene.Net.Analysis.Analyzer; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + using Document = Documents.Document; + using Field = Field; + using FieldType = FieldType; + using FixedBitSet = Lucene.Net.Util.FixedBitSet; + using IOUtils = Lucene.Net.Util.IOUtils; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using TestUtil = Lucene.Net.Util.TestUtil; + using TextField = TextField; + using TokenStream = Lucene.Net.Analysis.TokenStream; + + [SuppressCodecs("SimpleText", "Memory", "Direct")] + [TestFixture] + public class TestLongPostings : LuceneTestCase + { + // Produces a realistic unicode random string that + // survives MockAnalyzer unchanged: + private string GetRandomTerm(string other) + { + Analyzer a = new MockAnalyzer(Random()); + while (true) + { + string s = TestUtil.RandomRealisticUnicodeString(Random()); + if (other != null && s.Equals(other)) + { + continue; + } + IOException priorException = null; + TokenStream ts = a.TokenStream("foo", new StringReader(s)); + try + { + ITermToBytesRefAttribute termAtt = ts.GetAttribute<ITermToBytesRefAttribute>(); + BytesRef termBytes = termAtt.BytesRef; + ts.Reset(); + + int count = 0; + bool changed = false; + + while (ts.IncrementToken()) + { + termAtt.FillBytesRef(); + if (count == 0 && !termBytes.Utf8ToString().Equals(s)) + { + // The value was changed during analysis. Keep iterating so the + // tokenStream is exhausted. + changed = true; + } + count++; + } + + ts.End(); + // Did we iterate just once and the value was unchanged? + if (!changed && count == 1) + { + return s; + } + } + catch (IOException e) + { + priorException = e; + } + finally + { + IOUtils.CloseWhileHandlingException(priorException, ts); + } + } + } + + [Test] + public virtual void TestLongPostings_Mem() + { + // Don't use TestUtil.getTempDir so that we own the + // randomness (ie same seed will point to same dir): + Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random().NextLong())); + + int NUM_DOCS = AtLeast(2000); + + if (VERBOSE) + { + Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); + } + + string s1 = GetRandomTerm(null); + string s2 = GetRandomTerm(s1); + + if (VERBOSE) + { + Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2); + /* + for(int idx=0;idx<s1.Length();idx++) { + System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); + } + for(int idx=0;idx<s2.Length();idx++) { + System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); + } + */ + } + + FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); + for (int idx = 0; idx < NUM_DOCS; idx++) + { + if (Random().NextBoolean()) + { + isS1.Set(idx); + } + } + + IndexReader r; + IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()); + iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random().NextDouble()); + iwc.SetMaxBufferedDocs(-1); + RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, iwc); + + for (int idx = 0; idx < NUM_DOCS; idx++) + { + Document doc = new Document(); + string s = isS1.Get(idx) ? s1 : s2; + Field f = NewTextField("field", s, Field.Store.NO); + int count = TestUtil.NextInt(Random(), 1, 4); + for (int ct = 0; ct < count; ct++) + { + doc.Add(f); + } + riw.AddDocument(doc); + } + + r = riw.Reader; + riw.Dispose(); + + /* + if (VERBOSE) { + System.out.println("TEST: terms"); + TermEnum termEnum = r.Terms(); + while(termEnum.Next()) { + System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length()); + Assert.IsTrue(termEnum.DocFreq() > 0); + System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length()); + System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length()); + final String s = termEnum.Term().Text(); + for(int idx=0;idx<s.Length();idx++) { + System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); + } + } + } + */ + + Assert.AreEqual(NUM_DOCS, r.NumDocs); + Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0); + Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0); + + int num = AtLeast(1000); + for (int iter = 0; iter < num; iter++) + { + string term; + bool doS1; + if (Random().NextBoolean()) + { + term = s1; + doS1 = true; + } + else + { + term = s2; + doS1 = false; + } + + if (VERBOSE) + { + Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1); + } + + DocsAndPositionsEnum postings = MultiFields.GetTermPositionsEnum(r, null, "field", new BytesRef(term)); + + int docID = -1; + while (docID < DocIdSetIterator.NO_MORE_DOCS) + { + int what = Random().Next(3); + if (what == 0) + { + if (VERBOSE) + { + Console.WriteLine("TEST: docID=" + docID + "; do next()"); + } + // nextDoc + int expected = docID + 1; + while (true) + { + if (expected == NUM_DOCS) + { + expected = int.MaxValue; + break; + } + else if (isS1.Get(expected) == doS1) + { + break; + } + else + { + expected++; + } + } + docID = postings.NextDoc(); + if (VERBOSE) + { + Console.WriteLine(" got docID=" + docID); + } + Assert.AreEqual(expected, docID); + if (docID == DocIdSetIterator.NO_MORE_DOCS) + { + break; + } + + if (Random().Next(6) == 3) + { + int freq = postings.Freq; + Assert.IsTrue(freq >= 1 && freq <= 4); + for (int pos = 0; pos < freq; pos++) + { + Assert.AreEqual(pos, postings.NextPosition()); + if (Random().NextBoolean()) + { + var dummy = postings.GetPayload(); + if (Random().NextBoolean()) + { + dummy = postings.GetPayload(); // get it again + } + } + } + } + } + else + { + // advance + int targetDocID; + if (docID == -1) + { + targetDocID = Random().Next(NUM_DOCS + 1); + } + else + { + targetDocID = docID + TestUtil.NextInt(Random(), 1, NUM_DOCS - docID); + } + if (VERBOSE) + { + Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); + } + int expected = targetDocID; + while (true) + { + if (expected == NUM_DOCS) + { + expected = int.MaxValue; + break; + } + else if (isS1.Get(expected) == doS1) + { + break; + } + else + { + expected++; + } + } + + docID = postings.Advance(targetDocID); + if (VERBOSE) + { + Console.WriteLine(" got docID=" + docID); + } + Assert.AreEqual(expected, docID); + if (docID == DocIdSetIterator.NO_MORE_DOCS) + { + break; + } + + if (Random().Next(6) == 3) + { + int freq = postings.Freq; + Assert.IsTrue(freq >= 1 && freq <= 4); + for (int pos = 0; pos < freq; pos++) + { + Assert.AreEqual(pos, postings.NextPosition()); + if (Random().NextBoolean()) + { + var dummy = postings.GetPayload(); + if (Random().NextBoolean()) + { + dummy = postings.GetPayload(); // get it again + } + } + } + } + } + } + } + r.Dispose(); + dir.Dispose(); + } + + // a weaker form of testLongPostings, that doesnt check positions + [Test] + public virtual void TestLongPostingsNoPositions() + { + DoTestLongPostingsNoPositions(IndexOptions.DOCS_ONLY); + DoTestLongPostingsNoPositions(IndexOptions.DOCS_AND_FREQS); + } + + public virtual void DoTestLongPostingsNoPositions(IndexOptions options) + { + // Don't use TestUtil.getTempDir so that we own the + // randomness (ie same seed will point to same dir): + Directory dir = NewFSDirectory(CreateTempDir("longpostings" + "." + Random().NextLong())); + + int NUM_DOCS = AtLeast(2000); + + if (VERBOSE) + { + Console.WriteLine("TEST: NUM_DOCS=" + NUM_DOCS); + } + + string s1 = GetRandomTerm(null); + string s2 = GetRandomTerm(s1); + + if (VERBOSE) + { + Console.WriteLine("\nTEST: s1=" + s1 + " s2=" + s2); + /* + for(int idx=0;idx<s1.Length();idx++) { + System.out.println(" s1 ch=0x" + Integer.toHexString(s1.charAt(idx))); + } + for(int idx=0;idx<s2.Length();idx++) { + System.out.println(" s2 ch=0x" + Integer.toHexString(s2.charAt(idx))); + } + */ + } + + FixedBitSet isS1 = new FixedBitSet(NUM_DOCS); + for (int idx = 0; idx < NUM_DOCS; idx++) + { + if (Random().NextBoolean()) + { + isS1.Set(idx); + } + } + + IndexReader r; + if (true) + { + IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetMergePolicy(NewLogMergePolicy()); + iwc.SetRAMBufferSizeMB(16.0 + 16.0 * Random().NextDouble()); + iwc.SetMaxBufferedDocs(-1); + RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, iwc); + + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.IndexOptions = options; + for (int idx = 0; idx < NUM_DOCS; idx++) + { + Document doc = new Document(); + string s = isS1.Get(idx) ? s1 : s2; + Field f = NewField("field", s, ft); + int count = TestUtil.NextInt(Random(), 1, 4); + for (int ct = 0; ct < count; ct++) + { + doc.Add(f); + } + riw.AddDocument(doc); + } + + r = riw.Reader; + riw.Dispose(); + } + else + { +#pragma warning disable 162 + r = DirectoryReader.Open(dir); +#pragma warning restore 162 + } + + /* + if (VERBOSE) { + System.out.println("TEST: terms"); + TermEnum termEnum = r.Terms(); + while(termEnum.Next()) { + System.out.println(" term=" + termEnum.Term() + " len=" + termEnum.Term().Text().Length()); + Assert.IsTrue(termEnum.DocFreq() > 0); + System.out.println(" s1?=" + (termEnum.Term().Text().equals(s1)) + " s1len=" + s1.Length()); + System.out.println(" s2?=" + (termEnum.Term().Text().equals(s2)) + " s2len=" + s2.Length()); + final String s = termEnum.Term().Text(); + for(int idx=0;idx<s.Length();idx++) { + System.out.println(" ch=0x" + Integer.toHexString(s.charAt(idx))); + } + } + } + */ + + Assert.AreEqual(NUM_DOCS, r.NumDocs); + Assert.IsTrue(r.DocFreq(new Term("field", s1)) > 0); + Assert.IsTrue(r.DocFreq(new Term("field", s2)) > 0); + + int num = AtLeast(1000); + for (int iter = 0; iter < num; iter++) + { + string term; + bool doS1; + if (Random().NextBoolean()) + { + term = s1; + doS1 = true; + } + else + { + term = s2; + doS1 = false; + } + + if (VERBOSE) + { + Console.WriteLine("\nTEST: iter=" + iter + " doS1=" + doS1 + " term=" + term); + } + + DocsEnum docs; + DocsEnum postings; + + if (options == IndexOptions.DOCS_ONLY) + { + docs = TestUtil.Docs(Random(), r, "field", new BytesRef(term), null, null, DocsEnum.FLAG_NONE); + postings = null; + } + else + { + docs = postings = TestUtil.Docs(Random(), r, "field", new BytesRef(term), null, null, DocsEnum.FLAG_FREQS); + Debug.Assert(postings != null); + } + Debug.Assert(docs != null); + + int docID = -1; + while (docID < DocIdSetIterator.NO_MORE_DOCS) + { + int what = Random().Next(3); + if (what == 0) + { + if (VERBOSE) + { + Console.WriteLine("TEST: docID=" + docID + "; do next()"); + } + // nextDoc + int expected = docID + 1; + while (true) + { + if (expected == NUM_DOCS) + { + expected = int.MaxValue; + break; + } + else if (isS1.Get(expected) == doS1) + { + break; + } + else + { + expected++; + } + } + docID = docs.NextDoc(); + if (VERBOSE) + { + Console.WriteLine(" got docID=" + docID); + } + Assert.AreEqual(expected, docID); + if (docID == DocIdSetIterator.NO_MORE_DOCS) + { + break; + } + + if (Random().Next(6) == 3 && postings != null) + { + int freq = postings.Freq; + Assert.IsTrue(freq >= 1 && freq <= 4); + } + } + else + { + // advance + int targetDocID; + if (docID == -1) + { + targetDocID = Random().Next(NUM_DOCS + 1); + } + else + { + targetDocID = docID + TestUtil.NextInt(Random(), 1, NUM_DOCS - docID); + } + if (VERBOSE) + { + Console.WriteLine("TEST: docID=" + docID + "; do advance(" + targetDocID + ")"); + } + int expected = targetDocID; + while (true) + { + if (expected == NUM_DOCS) + { + expected = int.MaxValue; + break; + } + else if (isS1.Get(expected) == doS1) + { + break; + } + else + { + expected++; + } + } + + docID = docs.Advance(targetDocID); + if (VERBOSE) + { + Console.WriteLine(" got docID=" + docID); + } + Assert.AreEqual(expected, docID); + if (docID == DocIdSetIterator.NO_MORE_DOCS) + { + break; + } + + if (Random().Next(6) == 3 && postings != null) + { + int freq = postings.Freq; + Assert.IsTrue(freq >= 1 && freq <= 4, "got invalid freq=" + freq); + } + } + } + } + r.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestMaxTermFrequency.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestMaxTermFrequency.cs b/src/Lucene.Net.Tests/Index/TestMaxTermFrequency.cs new file mode 100644 index 0000000..cbbdc72 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestMaxTermFrequency.cs @@ -0,0 +1,171 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Support; + using NUnit.Framework; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using Field = Field; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockTokenizer = Lucene.Net.Analysis.MockTokenizer; + using TestUtil = Lucene.Net.Util.TestUtil; + using TFIDFSimilarity = Lucene.Net.Search.Similarities.TFIDFSimilarity; + + /// <summary> + /// Tests the maxTermFrequency statistic in FieldInvertState + /// </summary> + [TestFixture] + public class TestMaxTermFrequency : LuceneTestCase + { + internal Directory Dir; + internal IndexReader Reader; + /* expected maxTermFrequency values for our documents */ + internal List<int?> Expected = new List<int?>(); + + [SetUp] + public override void SetUp() + { + base.SetUp(); + Dir = NewDirectory(); + IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random(), MockTokenizer.SIMPLE, true)).SetMergePolicy(NewLogMergePolicy()); + config.SetSimilarity(new TestSimilarity(this)); + RandomIndexWriter writer = new RandomIndexWriter(Random(), Dir, config); + Document doc = new Document(); + Field foo = NewTextField("foo", "", Field.Store.NO); + doc.Add(foo); + for (int i = 0; i < 100; i++) + { + foo.SetStringValue(AddValue()); + writer.AddDocument(doc); + } + Reader = writer.Reader; + writer.Dispose(); + } + + [TearDown] + public override void TearDown() + { + Reader.Dispose(); + Dir.Dispose(); + base.TearDown(); + } + + [Test] + public virtual void Test() + { + NumericDocValues fooNorms = MultiDocValues.GetNormValues(Reader, "foo"); + for (int i = 0; i < Reader.MaxDoc; i++) + { + Assert.AreEqual((int)Expected[i], fooNorms.Get(i) & 0xff); + } + } + + /// <summary> + /// Makes a bunch of single-char tokens (the max freq will at most be 255). + /// shuffles them around, and returns the whole list with Arrays.toString(). + /// this works fine because we use lettertokenizer. + /// puts the max-frequency term into expected, to be checked against the norm. + /// </summary> + private string AddValue() + { + IList<string> terms = new List<string>(); + int maxCeiling = TestUtil.NextInt(Random(), 0, 255); + int max = 0; + for (char ch = 'a'; ch <= 'z'; ch++) + { + int num = TestUtil.NextInt(Random(), 0, maxCeiling); + for (int i = 0; i < num; i++) + { + terms.Add(char.ToString(ch)); + } + max = Math.Max(max, num); + } + Expected.Add(max); + + Collections.Shuffle(terms); + return Arrays.ToString(terms.ToArray()); + } + + /// <summary> + /// Simple similarity that encodes maxTermFrequency directly as a byte + /// </summary> + internal class TestSimilarity : TFIDFSimilarity + { + private readonly TestMaxTermFrequency OuterInstance; + + public TestSimilarity(TestMaxTermFrequency outerInstance) + { + this.OuterInstance = outerInstance; + } + + public override float LengthNorm(FieldInvertState state) + { + return state.MaxTermFrequency; + } + + public override long EncodeNormValue(float f) + { + return (sbyte)f; + } + + public override float DecodeNormValue(long norm) + { + return norm; + } + + public override float Coord(int overlap, int maxOverlap) + { + return 0; + } + + public override float QueryNorm(float sumOfSquaredWeights) + { + return 0; + } + + public override float Tf(float freq) + { + return 0; + } + + public override float Idf(long docFreq, long numDocs) + { + return 0; + } + + public override float SloppyFreq(int distance) + { + return 0; + } + + public override float ScorePayload(int doc, int start, int end, BytesRef payload) + { + return 0; + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestMixedCodecs.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestMixedCodecs.cs b/src/Lucene.Net.Tests/Index/TestMixedCodecs.cs new file mode 100644 index 0000000..56ba215 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestMixedCodecs.cs @@ -0,0 +1,107 @@ +using System; +using System.Collections.Generic; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Randomized.Generators; + using NUnit.Framework; + using Codec = Lucene.Net.Codecs.Codec; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using Field = Field; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using TestUtil = Lucene.Net.Util.TestUtil; + + [SuppressCodecs("Lucene3x")] + [TestFixture] + public class TestMixedCodecs : LuceneTestCase + { + [Test] + public virtual void Test() + { + int NUM_DOCS = AtLeast(1000); + + Directory dir = NewDirectory(); + RandomIndexWriter w = null; + + int docsLeftInthisSegment = 0; + + int docUpto = 0; + while (docUpto < NUM_DOCS) + { + if (VERBOSE) + { + Console.WriteLine("TEST: " + docUpto + " of " + NUM_DOCS); + } + if (docsLeftInthisSegment == 0) + { + IndexWriterConfig iwc = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); + if (Random().NextBoolean()) + { + // Make sure we aggressively mix in SimpleText + // since it has different impls for all codec + // formats... + iwc.SetCodec(Codec.ForName("Lucene46")); + } + if (w != null) + { + w.Dispose(); + } + w = new RandomIndexWriter(Random(), dir, iwc); + docsLeftInthisSegment = TestUtil.NextInt(Random(), 10, 100); + } + Document doc = new Document(); + doc.Add(NewStringField("id", Convert.ToString(docUpto), Field.Store.YES)); + w.AddDocument(doc); + docUpto++; + docsLeftInthisSegment--; + } + + if (VERBOSE) + { + Console.WriteLine("\nTEST: now delete..."); + } + + // Random delete half the docs: + HashSet<int?> deleted = new HashSet<int?>(); + while (deleted.Count < NUM_DOCS / 2) + { + int? toDelete = Random().Next(NUM_DOCS); + if (!deleted.Contains(toDelete)) + { + deleted.Add(toDelete); + w.DeleteDocuments(new Term("id", Convert.ToString(toDelete))); + if (Random().Next(17) == 6) + { + IndexReader r = w.Reader; + Assert.AreEqual(NUM_DOCS - deleted.Count, r.NumDocs); + r.Dispose(); + } + } + } + + w.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestMixedDocValuesUpdates.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestMixedDocValuesUpdates.cs b/src/Lucene.Net.Tests/Index/TestMixedDocValuesUpdates.cs new file mode 100644 index 0000000..4a3741d --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestMixedDocValuesUpdates.cs @@ -0,0 +1,576 @@ +using System; +using System.Threading; +using System.Collections.Generic; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Randomized.Generators; + using Lucene.Net.Support; + using NUnit.Framework; + using System.IO; + using BinaryDocValuesField = BinaryDocValuesField; + using IBits = Lucene.Net.Util.IBits; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using IOUtils = Lucene.Net.Util.IOUtils; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using NumericDocValuesField = NumericDocValuesField; + using Store = Field.Store; + using StringField = StringField; + using TestUtil = Lucene.Net.Util.TestUtil; + using Attributes; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + [SuppressCodecs("Appending", "Lucene3x", "Lucene40", "Lucene41", "Lucene42", "Lucene45")] + [TestFixture] + public class TestMixedDocValuesUpdates : LuceneTestCase + { + [Test] + public virtual void TestManyReopensAndFields() + { + Directory dir = NewDirectory(); + Random random = Random(); + IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + LogMergePolicy lmp = NewLogMergePolicy(); + lmp.MergeFactor = 3; // merge often + conf.SetMergePolicy(lmp); + IndexWriter writer = new IndexWriter(dir, conf); + + bool isNRT = random.NextBoolean(); + DirectoryReader reader; + if (isNRT) + { + reader = DirectoryReader.Open(writer, true); + } + else + { + writer.Commit(); + reader = DirectoryReader.Open(dir); + } + + int numFields = random.Next(4) + 3; // 3-7 + int numNDVFields = random.Next(numFields / 2) + 1; // 1-3 + long[] fieldValues = new long[numFields]; + bool[] fieldHasValue = new bool[numFields]; + Arrays.Fill(fieldHasValue, true); + for (int i = 0; i < fieldValues.Length; i++) + { + fieldValues[i] = 1; + } + + int numRounds = AtLeast(15); + int docID = 0; + for (int i = 0; i < numRounds; i++) + { + int numDocs = AtLeast(5); + // System.out.println("[" + Thread.currentThread().getName() + "]: round=" + i + ", numDocs=" + numDocs); + for (int j = 0; j < numDocs; j++) + { + Document doc = new Document(); + doc.Add(new StringField("id", "doc-" + docID, Store.NO)); + doc.Add(new StringField("key", "all", Store.NO)); // update key + // add all fields with their current value + for (int f = 0; f < fieldValues.Length; f++) + { + if (f < numNDVFields) + { + doc.Add(new NumericDocValuesField("f" + f, fieldValues[f])); + } + else + { + doc.Add(new BinaryDocValuesField("f" + f, TestBinaryDocValuesUpdates.ToBytes(fieldValues[f]))); + } + } + writer.AddDocument(doc); + ++docID; + } + + // if field's value was unset before, unset it from all new added documents too + for (int field = 0; field < fieldHasValue.Length; field++) + { + if (!fieldHasValue[field]) + { + if (field < numNDVFields) + { + writer.UpdateNumericDocValue(new Term("key", "all"), "f" + field, null); + } + else + { + writer.UpdateBinaryDocValue(new Term("key", "all"), "f" + field, null); + } + } + } + + int fieldIdx = random.Next(fieldValues.Length); + string updateField = "f" + fieldIdx; + if (random.NextBoolean()) + { + // System.out.println("[" + Thread.currentThread().getName() + "]: unset field '" + updateField + "'"); + fieldHasValue[fieldIdx] = false; + if (fieldIdx < numNDVFields) + { + writer.UpdateNumericDocValue(new Term("key", "all"), updateField, null); + } + else + { + writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, null); + } + } + else + { + fieldHasValue[fieldIdx] = true; + if (fieldIdx < numNDVFields) + { + writer.UpdateNumericDocValue(new Term("key", "all"), updateField, ++fieldValues[fieldIdx]); + } + else + { + writer.UpdateBinaryDocValue(new Term("key", "all"), updateField, TestBinaryDocValuesUpdates.ToBytes(++fieldValues[fieldIdx])); + } + // System.out.println("[" + Thread.currentThread().getName() + "]: updated field '" + updateField + "' to value " + fieldValues[fieldIdx]); + } + + if (random.NextDouble() < 0.2) + { + int deleteDoc = random.Next(docID); // might also delete an already deleted document, ok! + writer.DeleteDocuments(new Term("id", "doc-" + deleteDoc)); + // System.out.println("[" + Thread.currentThread().getName() + "]: deleted document: doc-" + deleteDoc); + } + + // verify reader + if (!isNRT) + { + writer.Commit(); + } + + // System.out.println("[" + Thread.currentThread().getName() + "]: reopen reader: " + reader); + DirectoryReader newReader = DirectoryReader.OpenIfChanged(reader); + Assert.IsNotNull(newReader); + reader.Dispose(); + reader = newReader; + // System.out.println("[" + Thread.currentThread().getName() + "]: reopened reader: " + reader); + Assert.IsTrue(reader.NumDocs > 0); // we delete at most one document per round + BytesRef scratch = new BytesRef(); + foreach (AtomicReaderContext context in reader.Leaves) + { + AtomicReader r = context.AtomicReader; + // System.out.println(((SegmentReader) r).getSegmentName()); + IBits liveDocs = r.LiveDocs; + for (int field = 0; field < fieldValues.Length; field++) + { + string f = "f" + field; + BinaryDocValues bdv = r.GetBinaryDocValues(f); + NumericDocValues ndv = r.GetNumericDocValues(f); + IBits docsWithField = r.GetDocsWithField(f); + if (field < numNDVFields) + { + Assert.IsNotNull(ndv); + Assert.IsNull(bdv); + } + else + { + Assert.IsNull(ndv); + Assert.IsNotNull(bdv); + } + int maxDoc = r.MaxDoc; + for (int doc = 0; doc < maxDoc; doc++) + { + if (liveDocs == null || liveDocs.Get(doc)) + { + // System.out.println("doc=" + (doc + context.DocBase) + " f='" + f + "' vslue=" + getValue(bdv, doc, scratch)); + if (fieldHasValue[field]) + { + Assert.IsTrue(docsWithField.Get(doc)); + if (field < numNDVFields) + { + Assert.AreEqual(fieldValues[field], ndv.Get(doc), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r); + } + else + { + Assert.AreEqual(fieldValues[field], TestBinaryDocValuesUpdates.GetValue(bdv, doc, scratch), "invalid value for doc=" + doc + ", field=" + f + ", reader=" + r); + } + } + else + { + Assert.IsFalse(docsWithField.Get(doc)); + } + } + } + } + } + // System.out.println(); + } + + IOUtils.Close(writer, reader, dir); + } + + [Test] + public virtual void TestStressMultiThreading() + { + Directory dir = NewDirectory(); + IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); + IndexWriter writer = new IndexWriter(dir, conf); + + // create index + int numThreads = TestUtil.NextInt(Random(), 3, 6); + int numDocs = AtLeast(2000); + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + doc.Add(new StringField("id", "doc" + i, Store.NO)); + double group = Random().NextDouble(); + string g; + if (group < 0.1) + { + g = "g0"; + } + else if (group < 0.5) + { + g = "g1"; + } + else if (group < 0.8) + { + g = "g2"; + } + else + { + g = "g3"; + } + doc.Add(new StringField("updKey", g, Store.NO)); + for (int j = 0; j < numThreads; j++) + { + long value = Random().Next(); + doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(value))); + doc.Add(new NumericDocValuesField("cf" + j, value * 2)); // control, always updated to f * 2 + } + writer.AddDocument(doc); + } + + CountdownEvent done = new CountdownEvent(numThreads); + AtomicInt32 numUpdates = new AtomicInt32(AtLeast(100)); + + // same thread updates a field as well as reopens + ThreadClass[] threads = new ThreadClass[numThreads]; + for (int i = 0; i < threads.Length; i++) + { + string f = "f" + i; + string cf = "cf" + i; + threads[i] = new ThreadAnonymousInnerClassHelper(this, "UpdateThread-" + i, writer, numDocs, done, numUpdates, f, cf); + } + + foreach (ThreadClass t in threads) + { + t.Start(); + } + done.Wait(); + writer.Dispose(); + + DirectoryReader reader = DirectoryReader.Open(dir); + BytesRef scratch = new BytesRef(); + foreach (AtomicReaderContext context in reader.Leaves) + { + AtomicReader r = context.AtomicReader; + for (int i = 0; i < numThreads; i++) + { + BinaryDocValues bdv = r.GetBinaryDocValues("f" + i); + NumericDocValues control = r.GetNumericDocValues("cf" + i); + IBits docsWithBdv = r.GetDocsWithField("f" + i); + IBits docsWithControl = r.GetDocsWithField("cf" + i); + IBits liveDocs = r.LiveDocs; + for (int j = 0; j < r.MaxDoc; j++) + { + if (liveDocs == null || liveDocs.Get(j)) + { + Assert.AreEqual(docsWithBdv.Get(j), docsWithControl.Get(j)); + if (docsWithBdv.Get(j)) + { + long ctrlValue = control.Get(j); + long bdvValue = TestBinaryDocValuesUpdates.GetValue(bdv, j, scratch) * 2; + // if (ctrlValue != bdvValue) { + // System.out.println("seg=" + r + ", f=f" + i + ", doc=" + j + ", group=" + r.Document(j).Get("updKey") + ", ctrlValue=" + ctrlValue + ", bdvBytes=" + scratch); + // } + Assert.AreEqual(ctrlValue, bdvValue); + } + } + } + } + } + reader.Dispose(); + + dir.Dispose(); + } + + private class ThreadAnonymousInnerClassHelper : ThreadClass + { + private readonly TestMixedDocValuesUpdates OuterInstance; + + private IndexWriter Writer; + private int NumDocs; + private CountdownEvent Done; + private AtomicInt32 NumUpdates; + private string f; + private string Cf; + + public ThreadAnonymousInnerClassHelper(TestMixedDocValuesUpdates outerInstance, string str, IndexWriter writer, int numDocs, CountdownEvent done, AtomicInt32 numUpdates, string f, string cf) + : base(str) + { + this.OuterInstance = outerInstance; + this.Writer = writer; + this.NumDocs = numDocs; + this.Done = done; + this.NumUpdates = numUpdates; + this.f = f; + this.Cf = cf; + } + + public override void Run() + { + DirectoryReader reader = null; + bool success = false; + try + { + Random random = Random(); + while (NumUpdates.GetAndDecrement() > 0) + { + double group = random.NextDouble(); + Term t; + if (group < 0.1) + { + t = new Term("updKey", "g0"); + } + else if (group < 0.5) + { + t = new Term("updKey", "g1"); + } + else if (group < 0.8) + { + t = new Term("updKey", "g2"); + } + else + { + t = new Term("updKey", "g3"); + } + // System.out.println("[" + Thread.currentThread().getName() + "] numUpdates=" + numUpdates + " updateTerm=" + t); + if (random.NextBoolean()) // sometimes unset a value + { + // System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=UNSET"); + Writer.UpdateBinaryDocValue(t, f, null); + Writer.UpdateNumericDocValue(t, Cf, null); + } + else + { + long updValue = random.Next(); + // System.err.println("[" + Thread.currentThread().getName() + "] t=" + t + ", f=" + f + ", updValue=" + updValue); + Writer.UpdateBinaryDocValue(t, f, TestBinaryDocValuesUpdates.ToBytes(updValue)); + Writer.UpdateNumericDocValue(t, Cf, updValue * 2); + } + + if (random.NextDouble() < 0.2) + { + // delete a random document + int doc = random.Next(NumDocs); + // System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc); + Writer.DeleteDocuments(new Term("id", "doc" + doc)); + } + + if (random.NextDouble() < 0.05) // commit every 20 updates on average + { + // System.out.println("[" + Thread.currentThread().getName() + "] commit"); + Writer.Commit(); + } + + if (random.NextDouble() < 0.1) // reopen NRT reader (apply updates), on average once every 10 updates + { + if (reader == null) + { + // System.out.println("[" + Thread.currentThread().getName() + "] open NRT"); + reader = DirectoryReader.Open(Writer, true); + } + else + { + // System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT"); + DirectoryReader r2 = DirectoryReader.OpenIfChanged(reader, Writer, true); + if (r2 != null) + { + reader.Dispose(); + reader = r2; + } + } + } + } + // System.out.println("[" + Thread.currentThread().getName() + "] DONE"); + success = true; + } + catch (IOException e) + { + throw new Exception(e.Message, e); + } + finally + { + if (reader != null) + { + try + { + reader.Dispose(); + } + catch (IOException e) + { + if (success) // suppress this exception only if there was another exception + { + throw new Exception(e.Message, e); + } + } + } + Done.Signal(); + } + } + } + + [Test] + public virtual void TestUpdateDifferentDocsInDifferentGens() + { + // update same document multiple times across generations + Directory dir = NewDirectory(); + IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); + conf.SetMaxBufferedDocs(4); + IndexWriter writer = new IndexWriter(dir, conf); + int numDocs = AtLeast(10); + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + doc.Add(new StringField("id", "doc" + i, Store.NO)); + long value = Random().Next(); + doc.Add(new BinaryDocValuesField("f", TestBinaryDocValuesUpdates.ToBytes(value))); + doc.Add(new NumericDocValuesField("cf", value * 2)); + writer.AddDocument(doc); + } + + int numGens = AtLeast(5); + BytesRef scratch = new BytesRef(); + for (int i = 0; i < numGens; i++) + { + int doc = Random().Next(numDocs); + Term t = new Term("id", "doc" + doc); + long value = Random().NextLong(); + writer.UpdateBinaryDocValue(t, "f", TestBinaryDocValuesUpdates.ToBytes(value)); + writer.UpdateNumericDocValue(t, "cf", value * 2); + DirectoryReader reader = DirectoryReader.Open(writer, true); + foreach (AtomicReaderContext context in reader.Leaves) + { + AtomicReader r = context.AtomicReader; + BinaryDocValues fbdv = r.GetBinaryDocValues("f"); + NumericDocValues cfndv = r.GetNumericDocValues("cf"); + for (int j = 0; j < r.MaxDoc; j++) + { + Assert.AreEqual(cfndv.Get(j), TestBinaryDocValuesUpdates.GetValue(fbdv, j, scratch) * 2); + } + } + reader.Dispose(); + } + writer.Dispose(); + dir.Dispose(); + } + +#if !NETSTANDARD + // LUCENENET: There is no Timeout on NUnit for .NET Core. + [Timeout(80000)] +#endif + [Test, HasTimeout] + public virtual void TestTonsOfUpdates() + { + // LUCENE-5248: make sure that when there are many updates, we don't use too much RAM + Directory dir = NewDirectory(); + Random random = Random(); + IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); + conf.SetRAMBufferSizeMB(IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB); + conf.SetMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH); // don't flush by doc + IndexWriter writer = new IndexWriter(dir, conf); + + // test data: lots of documents (few 10Ks) and lots of update terms (few hundreds) + int numDocs = AtLeast(20000); + int numBinaryFields = AtLeast(5); + int numTerms = TestUtil.NextInt(random, 10, 100); // terms should affect many docs + HashSet<string> updateTerms = new HashSet<string>(); + while (updateTerms.Count < numTerms) + { + updateTerms.Add(TestUtil.RandomSimpleString(random)); + } + + // System.out.println("numDocs=" + numDocs + " numBinaryFields=" + numBinaryFields + " numTerms=" + numTerms); + + // build a large index with many BDV fields and update terms + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + int numUpdateTerms = TestUtil.NextInt(random, 1, numTerms / 10); + for (int j = 0; j < numUpdateTerms; j++) + { + doc.Add(new StringField("upd", RandomInts.RandomFrom(random, updateTerms), Store.NO)); + } + for (int j = 0; j < numBinaryFields; j++) + { + long val = random.Next(); + doc.Add(new BinaryDocValuesField("f" + j, TestBinaryDocValuesUpdates.ToBytes(val))); + doc.Add(new NumericDocValuesField("cf" + j, val * 2)); + } + writer.AddDocument(doc); + } + + writer.Commit(); // commit so there's something to apply to + + // set to flush every 2048 bytes (approximately every 12 updates), so we get + // many flushes during binary updates + writer.Config.SetRAMBufferSizeMB(2048.0 / 1024 / 1024); + int numUpdates = AtLeast(100); + // System.out.println("numUpdates=" + numUpdates); + for (int i = 0; i < numUpdates; i++) + { + int field = random.Next(numBinaryFields); + Term updateTerm = new Term("upd", RandomInts.RandomFrom(random, updateTerms)); + long value = random.Next(); + writer.UpdateBinaryDocValue(updateTerm, "f" + field, TestBinaryDocValuesUpdates.ToBytes(value)); + writer.UpdateNumericDocValue(updateTerm, "cf" + field, value * 2); + } + + writer.Dispose(); + + DirectoryReader reader = DirectoryReader.Open(dir); + BytesRef scratch = new BytesRef(); + foreach (AtomicReaderContext context in reader.Leaves) + { + for (int i = 0; i < numBinaryFields; i++) + { + AtomicReader r = context.AtomicReader; + BinaryDocValues f = r.GetBinaryDocValues("f" + i); + NumericDocValues cf = r.GetNumericDocValues("cf" + i); + for (int j = 0; j < r.MaxDoc; j++) + { + Assert.AreEqual(cf.Get(j), TestBinaryDocValuesUpdates.GetValue(f, j, scratch) * 2, "reader=" + r + ", field=f" + i + ", doc=" + j); + } + } + } + reader.Dispose(); + + dir.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestMultiDocValues.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestMultiDocValues.cs b/src/Lucene.Net.Tests/Index/TestMultiDocValues.cs new file mode 100644 index 0000000..a4a4b84 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestMultiDocValues.cs @@ -0,0 +1,439 @@ +using Lucene.Net.Documents; +using Lucene.Net.Randomized.Generators; +using System.Collections.Generic; + +namespace Lucene.Net.Index +{ + using NUnit.Framework; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using BinaryDocValuesField = BinaryDocValuesField; + using IBits = Lucene.Net.Util.IBits; + using BytesRef = Lucene.Net.Util.BytesRef; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using Field = Field; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using NumericDocValuesField = NumericDocValuesField; + using SortedDocValuesField = SortedDocValuesField; + using SortedSetDocValuesField = SortedSetDocValuesField; + using TestUtil = Lucene.Net.Util.TestUtil; + + /// <summary> + /// Tests MultiDocValues versus ordinary segment merging </summary> + [SuppressCodecs("Lucene3x")] + [TestFixture] + public class TestMultiDocValues : LuceneTestCase + { + [Test] + public virtual void TestNumerics() + { + Directory dir = NewDirectory(); + Document doc = new Document(); + Field field = new NumericDocValuesField("numbers", 0); + doc.Add(field); + + IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); + iwc.SetMergePolicy(NewLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); + + int numDocs = AtLeast(500); + for (int i = 0; i < numDocs; i++) + { + field.SetInt64Value(Random().NextLong()); + iw.AddDocument(doc); + if (Random().Next(17) == 0) + { + iw.Commit(); + } + } + DirectoryReader ir = iw.Reader; + iw.ForceMerge(1); + DirectoryReader ir2 = iw.Reader; + AtomicReader merged = GetOnlySegmentReader(ir2); + iw.Dispose(); + + NumericDocValues multi = MultiDocValues.GetNumericValues(ir, "numbers"); + NumericDocValues single = merged.GetNumericDocValues("numbers"); + for (int i = 0; i < numDocs; i++) + { + Assert.AreEqual(single.Get(i), multi.Get(i)); + } + ir.Dispose(); + ir2.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestBinary() + { + Directory dir = NewDirectory(); + Document doc = new Document(); + BytesRef @ref = new BytesRef(); + Field field = new BinaryDocValuesField("bytes", @ref); + doc.Add(field); + + IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); + iwc.SetMergePolicy(NewLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); + + int numDocs = AtLeast(500); + for (int i = 0; i < numDocs; i++) + { + @ref.CopyChars(TestUtil.RandomUnicodeString(Random())); + iw.AddDocument(doc); + if (Random().Next(17) == 0) + { + iw.Commit(); + } + } + DirectoryReader ir = iw.Reader; + iw.ForceMerge(1); + DirectoryReader ir2 = iw.Reader; + AtomicReader merged = GetOnlySegmentReader(ir2); + iw.Dispose(); + + BinaryDocValues multi = MultiDocValues.GetBinaryValues(ir, "bytes"); + BinaryDocValues single = merged.GetBinaryDocValues("bytes"); + BytesRef actual = new BytesRef(); + BytesRef expected = new BytesRef(); + for (int i = 0; i < numDocs; i++) + { + single.Get(i, expected); + multi.Get(i, actual); + Assert.AreEqual(expected, actual); + } + ir.Dispose(); + ir2.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestSorted() + { + Directory dir = NewDirectory(); + Document doc = new Document(); + BytesRef @ref = new BytesRef(); + Field field = new SortedDocValuesField("bytes", @ref); + doc.Add(field); + + IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); + iwc.SetMergePolicy(NewLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); + + int numDocs = AtLeast(500); + for (int i = 0; i < numDocs; i++) + { + @ref.CopyChars(TestUtil.RandomUnicodeString(Random())); + if (DefaultCodecSupportsDocsWithField() && Random().Next(7) == 0) + { + iw.AddDocument(new Document()); + } + iw.AddDocument(doc); + if (Random().Next(17) == 0) + { + iw.Commit(); + } + } + DirectoryReader ir = iw.Reader; + iw.ForceMerge(1); + DirectoryReader ir2 = iw.Reader; + AtomicReader merged = GetOnlySegmentReader(ir2); + iw.Dispose(); + + SortedDocValues multi = MultiDocValues.GetSortedValues(ir, "bytes"); + SortedDocValues single = merged.GetSortedDocValues("bytes"); + Assert.AreEqual(single.ValueCount, multi.ValueCount); + BytesRef actual = new BytesRef(); + BytesRef expected = new BytesRef(); + for (int i = 0; i < numDocs; i++) + { + // check ord + Assert.AreEqual(single.GetOrd(i), multi.GetOrd(i)); + // check value + single.Get(i, expected); + multi.Get(i, actual); + Assert.AreEqual(expected, actual); + } + ir.Dispose(); + ir2.Dispose(); + dir.Dispose(); + } + + // tries to make more dups than testSorted + [Test] + public virtual void TestSortedWithLotsOfDups() + { + Directory dir = NewDirectory(); + Document doc = new Document(); + BytesRef @ref = new BytesRef(); + Field field = new SortedDocValuesField("bytes", @ref); + doc.Add(field); + + IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); + iwc.SetMergePolicy(NewLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); + + int numDocs = AtLeast(500); + for (int i = 0; i < numDocs; i++) + { + @ref.CopyChars(TestUtil.RandomSimpleString(Random(), 2)); + iw.AddDocument(doc); + if (Random().Next(17) == 0) + { + iw.Commit(); + } + } + DirectoryReader ir = iw.Reader; + iw.ForceMerge(1); + DirectoryReader ir2 = iw.Reader; + AtomicReader merged = GetOnlySegmentReader(ir2); + iw.Dispose(); + + SortedDocValues multi = MultiDocValues.GetSortedValues(ir, "bytes"); + SortedDocValues single = merged.GetSortedDocValues("bytes"); + Assert.AreEqual(single.ValueCount, multi.ValueCount); + BytesRef actual = new BytesRef(); + BytesRef expected = new BytesRef(); + for (int i = 0; i < numDocs; i++) + { + // check ord + Assert.AreEqual(single.GetOrd(i), multi.GetOrd(i)); + // check ord value + single.Get(i, expected); + multi.Get(i, actual); + Assert.AreEqual(expected, actual); + } + ir.Dispose(); + ir2.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestSortedSet() + { + AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet()); + Directory dir = NewDirectory(); + + IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); + iwc.SetMergePolicy(NewLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); + + int numDocs = AtLeast(500); + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + int numValues = Random().Next(5); + for (int j = 0; j < numValues; j++) + { + doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomUnicodeString(Random())))); + } + iw.AddDocument(doc); + if (Random().Next(17) == 0) + { + iw.Commit(); + } + } + DirectoryReader ir = iw.Reader; + iw.ForceMerge(1); + DirectoryReader ir2 = iw.Reader; + AtomicReader merged = GetOnlySegmentReader(ir2); + iw.Dispose(); + + SortedSetDocValues multi = MultiDocValues.GetSortedSetValues(ir, "bytes"); + SortedSetDocValues single = merged.GetSortedSetDocValues("bytes"); + if (multi == null) + { + Assert.IsNull(single); + } + else + { + Assert.AreEqual(single.ValueCount, multi.ValueCount); + BytesRef actual = new BytesRef(); + BytesRef expected = new BytesRef(); + // check values + for (long i = 0; i < single.ValueCount; i++) + { + single.LookupOrd(i, expected); + multi.LookupOrd(i, actual); + Assert.AreEqual(expected, actual); + } + // check ord list + for (int i = 0; i < numDocs; i++) + { + single.SetDocument(i); + List<long> expectedList = new List<long>(); + long ord; + while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) + { + expectedList.Add(ord); + } + + multi.SetDocument(i); + int upto = 0; + while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) + { + Assert.AreEqual(expectedList[upto], ord); + upto++; + } + Assert.AreEqual(expectedList.Count, upto); + } + } + + ir.Dispose(); + ir2.Dispose(); + dir.Dispose(); + } + + // tries to make more dups than testSortedSet + [Test] + public virtual void TestSortedSetWithDups() + { + AssumeTrue("codec does not support SORTED_SET", DefaultCodecSupportsSortedSet()); + Directory dir = NewDirectory(); + + IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); + iwc.SetMergePolicy(NewLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); + + int numDocs = AtLeast(500); + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + int numValues = Random().Next(5); + for (int j = 0; j < numValues; j++) + { + doc.Add(new SortedSetDocValuesField("bytes", new BytesRef(TestUtil.RandomSimpleString(Random(), 2)))); + } + iw.AddDocument(doc); + if (Random().Next(17) == 0) + { + iw.Commit(); + } + } + DirectoryReader ir = iw.Reader; + iw.ForceMerge(1); + DirectoryReader ir2 = iw.Reader; + AtomicReader merged = GetOnlySegmentReader(ir2); + iw.Dispose(); + + SortedSetDocValues multi = MultiDocValues.GetSortedSetValues(ir, "bytes"); + SortedSetDocValues single = merged.GetSortedSetDocValues("bytes"); + if (multi == null) + { + Assert.IsNull(single); + } + else + { + Assert.AreEqual(single.ValueCount, multi.ValueCount); + BytesRef actual = new BytesRef(); + BytesRef expected = new BytesRef(); + // check values + for (long i = 0; i < single.ValueCount; i++) + { + single.LookupOrd(i, expected); + multi.LookupOrd(i, actual); + Assert.AreEqual(expected, actual); + } + // check ord list + for (int i = 0; i < numDocs; i++) + { + single.SetDocument(i); + List<long?> expectedList = new List<long?>(); + long ord; + while ((ord = single.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) + { + expectedList.Add(ord); + } + + multi.SetDocument(i); + int upto = 0; + while ((ord = multi.NextOrd()) != SortedSetDocValues.NO_MORE_ORDS) + { + Assert.AreEqual((long)expectedList[upto], ord); + upto++; + } + Assert.AreEqual(expectedList.Count, upto); + } + } + + ir.Dispose(); + ir2.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestDocsWithField() + { + AssumeTrue("codec does not support docsWithField", DefaultCodecSupportsDocsWithField()); + Directory dir = NewDirectory(); + + IndexWriterConfig iwc = NewIndexWriterConfig(Random(), TEST_VERSION_CURRENT, null); + iwc.SetMergePolicy(NewLogMergePolicy()); + RandomIndexWriter iw = new RandomIndexWriter(Random(), dir, iwc); + + int numDocs = AtLeast(500); + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + if (Random().Next(4) >= 0) + { + doc.Add(new NumericDocValuesField("numbers", Random().NextLong())); + } + doc.Add(new NumericDocValuesField("numbersAlways", Random().NextLong())); + iw.AddDocument(doc); + if (Random().Next(17) == 0) + { + iw.Commit(); + } + } + DirectoryReader ir = iw.Reader; + iw.ForceMerge(1); + DirectoryReader ir2 = iw.Reader; + AtomicReader merged = GetOnlySegmentReader(ir2); + iw.Dispose(); + + IBits multi = MultiDocValues.GetDocsWithField(ir, "numbers"); + IBits single = merged.GetDocsWithField("numbers"); + if (multi == null) + { + Assert.IsNull(single); + } + else + { + Assert.AreEqual(single.Length, multi.Length); + for (int i = 0; i < numDocs; i++) + { + Assert.AreEqual(single.Get(i), multi.Get(i)); + } + } + + multi = MultiDocValues.GetDocsWithField(ir, "numbersAlways"); + single = merged.GetDocsWithField("numbersAlways"); + Assert.AreEqual(single.Length, multi.Length); + for (int i = 0; i < numDocs; i++) + { + Assert.AreEqual(single.Get(i), multi.Get(i)); + } + ir.Dispose(); + ir2.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file
