http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestStressIndexing2.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestStressIndexing2.cs b/src/Lucene.Net.Tests/Index/TestStressIndexing2.cs new file mode 100644 index 0000000..f073cb4 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestStressIndexing2.cs @@ -0,0 +1,1064 @@ +using Lucene.Net.Analysis.TokenAttributes; +using Lucene.Net.Documents; +using Lucene.Net.Randomized.Generators; +using Lucene.Net.Support; +using Lucene.Net.Util; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Threading; + +namespace Lucene.Net.Index +{ + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Directory = Lucene.Net.Store.Directory; + using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + using Document = Documents.Document; + using Field = Field; + using FieldType = FieldType; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using TermQuery = Lucene.Net.Search.TermQuery; + using TextField = TextField; + + [TestFixture] + public class TestStressIndexing2 : LuceneTestCase + { + internal static int MaxFields = 4; + internal static int BigFieldSize = 10; + internal static bool SameFieldOrder = false; + internal static int MergeFactor = 3; + internal static int MaxBufferedDocs = 3; + internal static int Seed = 0; + + public sealed class YieldTestPoint : RandomIndexWriter.TestPoint + { + private readonly TestStressIndexing2 OuterInstance; + + public YieldTestPoint(TestStressIndexing2 outerInstance) + { + this.OuterInstance = outerInstance; + } + + public void Apply(string name) + { + // if (name.equals("startCommit")) { + if (Random().Next(4) == 2) + { + Thread.Sleep(0); + } + } + } + + // + [Test] + public virtual void TestRandomIWReader() + { + Directory dir = NewDirectory(); + + // TODO: verify equals using IW.getReader + DocsAndWriter dw = IndexRandomIWReader(5, 3, 100, dir); + DirectoryReader reader = dw.Writer.Reader; + dw.Writer.Commit(); + VerifyEquals(Random(), reader, dir, "id"); + reader.Dispose(); + dw.Writer.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestRandom() + { + Directory dir1 = NewDirectory(); + Directory dir2 = NewDirectory(); + // mergeFactor=2; maxBufferedDocs=2; Map docs = indexRandom(1, 3, 2, dir1); + int maxThreadStates = 1 + Random().Next(10); + bool doReaderPooling = Random().NextBoolean(); + IDictionary<string, Document> docs = IndexRandom(5, 3, 100, dir1, maxThreadStates, doReaderPooling); + IndexSerial(Random(), docs, dir2); + + // verifying verify + // verifyEquals(dir1, dir1, "id"); + // verifyEquals(dir2, dir2, "id"); + + VerifyEquals(dir1, dir2, "id"); + dir1.Dispose(); + dir2.Dispose(); + } + + [Test] + public virtual void TestMultiConfig() + { + // test lots of smaller different params together + + int num = AtLeast(3); + for (int i = 0; i < num; i++) // increase iterations for better testing + { + if (VERBOSE) + { + Console.WriteLine("\n\nTEST: top iter=" + i); + } + SameFieldOrder = Random().NextBoolean(); + MergeFactor = Random().Next(3) + 2; + MaxBufferedDocs = Random().Next(3) + 2; + int maxThreadStates = 1 + Random().Next(10); + bool doReaderPooling = Random().NextBoolean(); + Seed++; + + int nThreads = Random().Next(5) + 1; + int iter = Random().Next(5) + 1; + int range = Random().Next(20) + 1; + Directory dir1 = NewDirectory(); + Directory dir2 = NewDirectory(); + if (VERBOSE) + { + Console.WriteLine(" nThreads=" + nThreads + " iter=" + iter + " range=" + range + " doPooling=" + doReaderPooling + " maxThreadStates=" + maxThreadStates + " sameFieldOrder=" + SameFieldOrder + " mergeFactor=" + MergeFactor + " maxBufferedDocs=" + MaxBufferedDocs); + } + IDictionary<string, Document> docs = IndexRandom(nThreads, iter, range, dir1, maxThreadStates, doReaderPooling); + if (VERBOSE) + { + Console.WriteLine("TEST: index serial"); + } + IndexSerial(Random(), docs, dir2); + if (VERBOSE) + { + Console.WriteLine("TEST: verify"); + } + VerifyEquals(dir1, dir2, "id"); + dir1.Dispose(); + dir2.Dispose(); + } + } + + internal static Term IdTerm = new Term("id", ""); + internal IndexingThread[] Threads; + internal static IComparer<IIndexableField> fieldNameComparer = new ComparerAnonymousInnerClassHelper(); + + private class ComparerAnonymousInnerClassHelper : IComparer<IIndexableField> + { + public ComparerAnonymousInnerClassHelper() + { + } + + public virtual int Compare(IIndexableField o1, IIndexableField o2) + { + return o1.Name.CompareTo(o2.Name); + } + } + + // this test avoids using any extra synchronization in the multiple + // indexing threads to test that IndexWriter does correctly synchronize + // everything. + + public class DocsAndWriter + { + internal IDictionary<string, Document> Docs; + internal IndexWriter Writer; + } + + public virtual DocsAndWriter IndexRandomIWReader(int nThreads, int iterations, int range, Directory dir) + { + IDictionary<string, Document> docs = new Dictionary<string, Document>(); + IndexWriter w = RandomIndexWriter.MockIndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetRAMBufferSizeMB(0.1).SetMaxBufferedDocs(MaxBufferedDocs).SetMergePolicy(NewLogMergePolicy()), new YieldTestPoint(this)); + w.Commit(); + LogMergePolicy lmp = (LogMergePolicy)w.Config.MergePolicy; + lmp.NoCFSRatio = 0.0; + lmp.MergeFactor = MergeFactor; + /* + /// w.setMaxMergeDocs(Integer.MAX_VALUE); + /// w.setMaxFieldLength(10000); + /// w.SetRAMBufferSizeMB(1); + /// w.setMergeFactor(10); + */ + + Threads = new IndexingThread[nThreads]; + for (int i = 0; i < Threads.Length; i++) + { + IndexingThread th = new IndexingThread(this); + th.w = w; + th.@base = 1000000 * i; + th.Range = range; + th.Iterations = iterations; + Threads[i] = th; + } + + for (int i = 0; i < Threads.Length; i++) + { + Threads[i].Start(); + } + for (int i = 0; i < Threads.Length; i++) + { + Threads[i].Join(); + } + + // w.ForceMerge(1); + //w.Dispose(); + + for (int i = 0; i < Threads.Length; i++) + { + IndexingThread th = Threads[i]; + lock (th) + { + docs.PutAll(th.Docs); + } + } + + TestUtil.CheckIndex(dir); + DocsAndWriter dw = new DocsAndWriter(); + dw.Docs = docs; + dw.Writer = w; + return dw; + } + + public virtual IDictionary<string, Document> IndexRandom(int nThreads, int iterations, int range, Directory dir, int maxThreadStates, bool doReaderPooling) + { + IDictionary<string, Document> docs = new Dictionary<string, Document>(); + IndexWriter w = RandomIndexWriter.MockIndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetOpenMode(OpenMode.CREATE).SetRAMBufferSizeMB(0.1).SetMaxBufferedDocs(MaxBufferedDocs).SetIndexerThreadPool(new ThreadAffinityDocumentsWriterThreadPool(maxThreadStates)).SetReaderPooling(doReaderPooling).SetMergePolicy(NewLogMergePolicy()), new YieldTestPoint(this)); + LogMergePolicy lmp = (LogMergePolicy)w.Config.MergePolicy; + lmp.NoCFSRatio = 0.0; + lmp.MergeFactor = MergeFactor; + + Threads = new IndexingThread[nThreads]; + for (int i = 0; i < Threads.Length; i++) + { + IndexingThread th = new IndexingThread(this); + th.w = w; + th.@base = 1000000 * i; + th.Range = range; + th.Iterations = iterations; + Threads[i] = th; + } + + for (int i = 0; i < Threads.Length; i++) + { + Threads[i].Start(); + } + for (int i = 0; i < Threads.Length; i++) + { + Threads[i].Join(); + } + + //w.ForceMerge(1); + w.Dispose(); + + for (int i = 0; i < Threads.Length; i++) + { + IndexingThread th = Threads[i]; + lock (th) + { + docs.PutAll(th.Docs); + } + } + + //System.out.println("TEST: checkindex"); + TestUtil.CheckIndex(dir); + + return docs; + } + + /// <summary> + /// LUCENENET specific + /// Is non-static because NewIndexWriterConfig is no longer static. + /// </summary> + public void IndexSerial(Random random, IDictionary<string, Document> docs, Directory dir) + { + IndexWriter w = new IndexWriter(dir, NewIndexWriterConfig(random, TEST_VERSION_CURRENT, new MockAnalyzer(random)).SetMergePolicy(NewLogMergePolicy())); + + // index all docs in a single thread + IEnumerator<Document> iter = docs.Values.GetEnumerator(); + while (iter.MoveNext()) + { + Document d = iter.Current; + List<IIndexableField> fields = new List<IIndexableField>(); + fields.AddRange(d.Fields); + // put fields in same order each time + fields.Sort(fieldNameComparer); + + Document d1 = new Document(); + for (int i = 0; i < fields.Count; i++) + { + d1.Add(fields[i]); + } + w.AddDocument(d1); + // System.out.println("indexing "+d1); + } + + w.Dispose(); + } + + public virtual void VerifyEquals(Random r, DirectoryReader r1, Directory dir2, string idField) + { + DirectoryReader r2 = DirectoryReader.Open(dir2); + VerifyEquals(r1, r2, idField); + r2.Dispose(); + } + + public virtual void VerifyEquals(Directory dir1, Directory dir2, string idField) + { + DirectoryReader r1 = DirectoryReader.Open(dir1); + DirectoryReader r2 = DirectoryReader.Open(dir2); + VerifyEquals(r1, r2, idField); + r1.Dispose(); + r2.Dispose(); + } + + private static void PrintDocs(DirectoryReader r) + { + foreach (AtomicReaderContext ctx in r.Leaves) + { + // TODO: improve this + AtomicReader sub = (AtomicReader)ctx.Reader; + IBits liveDocs = sub.LiveDocs; + Console.WriteLine(" " + ((SegmentReader)sub).SegmentInfo); + for (int docID = 0; docID < sub.MaxDoc; docID++) + { + Document doc = sub.Document(docID); + if (liveDocs == null || liveDocs.Get(docID)) + { + Console.WriteLine(" docID=" + docID + " id:" + doc.Get("id")); + } + else + { + Console.WriteLine(" DEL docID=" + docID + " id:" + doc.Get("id")); + } + } + } + } + + public virtual void VerifyEquals(DirectoryReader r1, DirectoryReader r2, string idField) + { + if (VERBOSE) + { + Console.WriteLine("\nr1 docs:"); + PrintDocs(r1); + Console.WriteLine("\nr2 docs:"); + PrintDocs(r2); + } + if (r1.NumDocs != r2.NumDocs) + { + Debug.Assert(false, "r1.NumDocs=" + r1.NumDocs + " vs r2.NumDocs=" + r2.NumDocs); + } + bool hasDeletes = !(r1.MaxDoc == r2.MaxDoc && r1.NumDocs == r1.MaxDoc); + + int[] r2r1 = new int[r2.MaxDoc]; // r2 id to r1 id mapping + + // create mapping from id2 space to id2 based on idField + Fields f1 = MultiFields.GetFields(r1); + if (f1 == null) + { + // make sure r2 is empty + Assert.IsNull(MultiFields.GetFields(r2)); + return; + } + Terms terms1 = f1.GetTerms(idField); + if (terms1 == null) + { + Assert.IsTrue(MultiFields.GetFields(r2) == null || MultiFields.GetFields(r2).GetTerms(idField) == null); + return; + } + TermsEnum termsEnum = terms1.GetIterator(null); + + IBits liveDocs1 = MultiFields.GetLiveDocs(r1); + IBits liveDocs2 = MultiFields.GetLiveDocs(r2); + + Fields fields = MultiFields.GetFields(r2); + if (fields == null) + { + // make sure r1 is in fact empty (eg has only all + // deleted docs): + IBits liveDocs = MultiFields.GetLiveDocs(r1); + DocsEnum docs = null; + while (termsEnum.Next() != null) + { + docs = TestUtil.Docs(Random(), termsEnum, liveDocs, docs, DocsEnum.FLAG_NONE); + while (docs.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) + { + Assert.Fail("r1 is not empty but r2 is"); + } + } + return; + } + Terms terms2 = fields.GetTerms(idField); + TermsEnum termsEnum2 = terms2.GetIterator(null); + + DocsEnum termDocs1 = null; + DocsEnum termDocs2 = null; + + while (true) + { + BytesRef term = termsEnum.Next(); + //System.out.println("TEST: match id term=" + term); + if (term == null) + { + break; + } + + termDocs1 = TestUtil.Docs(Random(), termsEnum, liveDocs1, termDocs1, DocsEnum.FLAG_NONE); + if (termsEnum2.SeekExact(term)) + { + termDocs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, termDocs2, DocsEnum.FLAG_NONE); + } + else + { + termDocs2 = null; + } + + if (termDocs1.NextDoc() == DocIdSetIterator.NO_MORE_DOCS) + { + // this doc is deleted and wasn't replaced + Assert.IsTrue(termDocs2 == null || termDocs2.NextDoc() == DocIdSetIterator.NO_MORE_DOCS); + continue; + } + + int id1 = termDocs1.DocID; + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs1.NextDoc()); + + Assert.IsTrue(termDocs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + int id2 = termDocs2.DocID; + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, termDocs2.NextDoc()); + + r2r1[id2] = id1; + + // verify stored fields are equivalent + try + { + VerifyEquals(r1.Document(id1), r2.Document(id2)); + } + catch (Exception t) + { + Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2 + " term=" + term); + Console.WriteLine(" d1=" + r1.Document(id1)); + Console.WriteLine(" d2=" + r2.Document(id2)); + throw t; + } + + try + { + // verify term vectors are equivalent + VerifyEquals(r1.GetTermVectors(id1), r2.GetTermVectors(id2)); + } + catch (Exception e) + { + Console.WriteLine("FAILED id=" + term + " id1=" + id1 + " id2=" + id2); + Fields tv1 = r1.GetTermVectors(id1); + Console.WriteLine(" d1=" + tv1); + if (tv1 != null) + { + DocsAndPositionsEnum dpEnum = null; + DocsEnum dEnum = null; + foreach (string field in tv1) + { + Console.WriteLine(" " + field + ":"); + Terms terms3 = tv1.GetTerms(field); + Assert.IsNotNull(terms3); + TermsEnum termsEnum3 = terms3.GetIterator(null); + BytesRef term2; + while ((term2 = termsEnum3.Next()) != null) + { + Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq); + dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); + if (dpEnum != null) + { + Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + int freq = dpEnum.Freq; + Console.WriteLine(" doc=" + dpEnum.DocID + " freq=" + freq); + for (int posUpto = 0; posUpto < freq; posUpto++) + { + Console.WriteLine(" pos=" + dpEnum.NextPosition()); + } + } + else + { + dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS); + Assert.IsNotNull(dEnum); + Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + int freq = dEnum.Freq; + Console.WriteLine(" doc=" + dEnum.DocID + " freq=" + freq); + } + } + } + } + + Fields tv2 = r2.GetTermVectors(id2); + Console.WriteLine(" d2=" + tv2); + if (tv2 != null) + { + DocsAndPositionsEnum dpEnum = null; + DocsEnum dEnum = null; + foreach (string field in tv2) + { + Console.WriteLine(" " + field + ":"); + Terms terms3 = tv2.GetTerms(field); + Assert.IsNotNull(terms3); + TermsEnum termsEnum3 = terms3.GetIterator(null); + BytesRef term2; + while ((term2 = termsEnum3.Next()) != null) + { + Console.WriteLine(" " + term2.Utf8ToString() + ": freq=" + termsEnum3.TotalTermFreq); + dpEnum = termsEnum3.DocsAndPositions(null, dpEnum); + if (dpEnum != null) + { + Assert.IsTrue(dpEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + int freq = dpEnum.Freq; + Console.WriteLine(" doc=" + dpEnum.DocID + " freq=" + freq); + for (int posUpto = 0; posUpto < freq; posUpto++) + { + Console.WriteLine(" pos=" + dpEnum.NextPosition()); + } + } + else + { + dEnum = TestUtil.Docs(Random(), termsEnum3, null, dEnum, DocsEnum.FLAG_FREQS); + Assert.IsNotNull(dEnum); + Assert.IsTrue(dEnum.NextDoc() != DocIdSetIterator.NO_MORE_DOCS); + int freq = dEnum.Freq; + Console.WriteLine(" doc=" + dEnum.DocID + " freq=" + freq); + } + } + } + } + + throw e; + } + } + + //System.out.println("TEST: done match id"); + + // Verify postings + //System.out.println("TEST: create te1"); + Fields fields1 = MultiFields.GetFields(r1); + IEnumerator<string> fields1Enum = fields1.GetEnumerator(); + Fields fields2 = MultiFields.GetFields(r2); + IEnumerator<string> fields2Enum = fields2.GetEnumerator(); + + string field1 = null, field2 = null; + TermsEnum termsEnum1 = null; + termsEnum2 = null; + DocsEnum docs1 = null, docs2 = null; + + // pack both doc and freq into single element for easy sorting + long[] info1 = new long[r1.NumDocs]; + long[] info2 = new long[r2.NumDocs]; + + for (; ; ) + { + BytesRef term1 = null, term2 = null; + + // iterate until we get some docs + int len1; + for (; ; ) + { + len1 = 0; + if (termsEnum1 == null) + { + if (!fields1Enum.MoveNext()) + { + break; + } + field1 = fields1Enum.Current; + Terms terms = fields1.GetTerms(field1); + if (terms == null) + { + continue; + } + termsEnum1 = terms.GetIterator(null); + } + term1 = termsEnum1.Next(); + if (term1 == null) + { + // no more terms in this field + termsEnum1 = null; + continue; + } + + //System.out.println("TEST: term1=" + term1); + docs1 = TestUtil.Docs(Random(), termsEnum1, liveDocs1, docs1, DocsEnum.FLAG_FREQS); + while (docs1.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) + { + int d = docs1.DocID; + int f = docs1.Freq; + info1[len1] = (((long)d) << 32) | f; + len1++; + } + if (len1 > 0) + { + break; + } + } + + // iterate until we get some docs + int len2; + for (; ; ) + { + len2 = 0; + if (termsEnum2 == null) + { + if (!fields2Enum.MoveNext()) + { + break; + } + field2 = fields2Enum.Current; + Terms terms = fields2.GetTerms(field2); + if (terms == null) + { + continue; + } + termsEnum2 = terms.GetIterator(null); + } + term2 = termsEnum2.Next(); + if (term2 == null) + { + // no more terms in this field + termsEnum2 = null; + continue; + } + + //System.out.println("TEST: term1=" + term1); + docs2 = TestUtil.Docs(Random(), termsEnum2, liveDocs2, docs2, DocsEnum.FLAG_FREQS); + while (docs2.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) + { + int d = r2r1[docs2.DocID]; + int f = docs2.Freq; + info2[len2] = (((long)d) << 32) | f; + len2++; + } + if (len2 > 0) + { + break; + } + } + + Assert.AreEqual(len1, len2); + if (len1 == 0) // no more terms + { + break; + } + + Assert.AreEqual(field1, field2); + Assert.IsTrue(term1.BytesEquals(term2)); + + if (!hasDeletes) + { + Assert.AreEqual(termsEnum1.DocFreq, termsEnum2.DocFreq); + } + + Assert.AreEqual(term1, term2, "len1=" + len1 + " len2=" + len2 + " deletes?=" + hasDeletes); + + // sort info2 to get it into ascending docid + Array.Sort(info2, 0, len2); + + // now compare + for (int i = 0; i < len1; i++) + { + Assert.AreEqual(info1[i], info2[i], "i=" + i + " len=" + len1 + " d1=" + ((long)((ulong)info1[i] >> 32)) + " f1=" + (info1[i] & int.MaxValue) + " d2=" + ((long)((ulong)info2[i] >> 32)) + " f2=" + (info2[i] & int.MaxValue) + " field=" + field1 + " term=" + term1.Utf8ToString()); + } + } + } + + public static void VerifyEquals(Document d1, Document d2) + { + IList<IIndexableField> ff1 = d1.Fields; + IList<IIndexableField> ff2 = d2.Fields; + + ff1.Sort(fieldNameComparer); + ff2.Sort(fieldNameComparer); + + Assert.AreEqual(ff1.Count, ff2.Count, ff1 + " : " + ff2); + + for (int i = 0; i < ff1.Count; i++) + { + IIndexableField f1 = ff1[i]; + IIndexableField f2 = ff2[i]; + if (f1.GetBinaryValue() != null) + { + Debug.Assert(f2.GetBinaryValue() != null); + } + else + { + string s1 = f1.GetStringValue(); + string s2 = f2.GetStringValue(); + Assert.AreEqual(s1, s2, ff1 + " : " + ff2); + } + } + } + + public static void VerifyEquals(Fields d1, Fields d2) + { + if (d1 == null) + { + Assert.IsTrue(d2 == null || d2.Count == 0); + return; + } + Assert.IsTrue(d2 != null); + + IEnumerator<string> fieldsEnum2 = d2.GetEnumerator(); + + foreach (string field1 in d1) + { + fieldsEnum2.MoveNext(); + string field2 = fieldsEnum2.Current; + Assert.AreEqual(field1, field2); + + Terms terms1 = d1.GetTerms(field1); + Assert.IsNotNull(terms1); + TermsEnum termsEnum1 = terms1.GetIterator(null); + + Terms terms2 = d2.GetTerms(field2); + Assert.IsNotNull(terms2); + TermsEnum termsEnum2 = terms2.GetIterator(null); + + DocsAndPositionsEnum dpEnum1 = null; + DocsAndPositionsEnum dpEnum2 = null; + DocsEnum dEnum1 = null; + DocsEnum dEnum2 = null; + + BytesRef term1; + while ((term1 = termsEnum1.Next()) != null) + { + BytesRef term2 = termsEnum2.Next(); + Assert.AreEqual(term1, term2); + Assert.AreEqual(termsEnum1.TotalTermFreq, termsEnum2.TotalTermFreq); + + dpEnum1 = termsEnum1.DocsAndPositions(null, dpEnum1); + dpEnum2 = termsEnum2.DocsAndPositions(null, dpEnum2); + if (dpEnum1 != null) + { + Assert.IsNotNull(dpEnum2); + int docID1 = dpEnum1.NextDoc(); + dpEnum2.NextDoc(); + // docIDs are not supposed to be equal + //int docID2 = dpEnum2.NextDoc(); + //Assert.AreEqual(docID1, docID2); + Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS); + + int freq1 = dpEnum1.Freq; + int freq2 = dpEnum2.Freq; + Assert.AreEqual(freq1, freq2); + IOffsetAttribute offsetAtt1 = dpEnum1.Attributes.HasAttribute<IOffsetAttribute>() ? dpEnum1.Attributes.GetAttribute<IOffsetAttribute>() : null; + IOffsetAttribute offsetAtt2 = dpEnum2.Attributes.HasAttribute<IOffsetAttribute>() ? dpEnum2.Attributes.GetAttribute<IOffsetAttribute>() : null; + + if (offsetAtt1 != null) + { + Assert.IsNotNull(offsetAtt2); + } + else + { + Assert.IsNull(offsetAtt2); + } + + for (int posUpto = 0; posUpto < freq1; posUpto++) + { + int pos1 = dpEnum1.NextPosition(); + int pos2 = dpEnum2.NextPosition(); + Assert.AreEqual(pos1, pos2); + if (offsetAtt1 != null) + { + Assert.AreEqual(offsetAtt1.StartOffset, offsetAtt2.StartOffset); + Assert.AreEqual(offsetAtt1.EndOffset, offsetAtt2.EndOffset); + } + } + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum1.NextDoc()); + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dpEnum2.NextDoc()); + } + else + { + dEnum1 = TestUtil.Docs(Random(), termsEnum1, null, dEnum1, DocsEnum.FLAG_FREQS); + dEnum2 = TestUtil.Docs(Random(), termsEnum2, null, dEnum2, DocsEnum.FLAG_FREQS); + Assert.IsNotNull(dEnum1); + Assert.IsNotNull(dEnum2); + int docID1 = dEnum1.NextDoc(); + dEnum2.NextDoc(); + // docIDs are not supposed to be equal + //int docID2 = dEnum2.NextDoc(); + //Assert.AreEqual(docID1, docID2); + Assert.IsTrue(docID1 != DocIdSetIterator.NO_MORE_DOCS); + int freq1 = dEnum1.Freq; + int freq2 = dEnum2.Freq; + Assert.AreEqual(freq1, freq2); + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum1.NextDoc()); + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, dEnum2.NextDoc()); + } + } + + Assert.IsNull(termsEnum2.Next()); + } + Assert.IsFalse(fieldsEnum2.MoveNext()); + } + + internal class IndexingThread : ThreadClass + { + private readonly TestStressIndexing2 OuterInstance; + + public IndexingThread(TestStressIndexing2 outerInstance) + { + this.OuterInstance = outerInstance; + } + + internal IndexWriter w; + internal int @base; + internal int Range; + internal int Iterations; + internal IDictionary<string, Document> Docs = new Dictionary<string, Document>(); + internal Random r; + + public virtual int NextInt(int lim) + { + return r.Next(lim); + } + + // start is inclusive and end is exclusive + public virtual int NextInt(int start, int end) + { + return start + r.Next(end - start); + } + + internal char[] Buffer = new char[100]; + + internal virtual int AddUTF8Token(int start) + { + int end = start + NextInt(20); + if (Buffer.Length < 1 + end) + { + char[] newBuffer = new char[(int)((1 + end) * 1.25)]; + Array.Copy(Buffer, 0, newBuffer, 0, Buffer.Length); + Buffer = newBuffer; + } + + for (int i = start; i < end; i++) + { + int t = NextInt(5); + if (0 == t && i < end - 1) + { + // Make a surrogate pair + // High surrogate + Buffer[i++] = (char)NextInt(0xd800, 0xdc00); + // Low surrogate + Buffer[i] = (char)NextInt(0xdc00, 0xe000); + } + else if (t <= 1) + { + Buffer[i] = (char)NextInt(0x80); + } + else if (2 == t) + { + Buffer[i] = (char)NextInt(0x80, 0x800); + } + else if (3 == t) + { + Buffer[i] = (char)NextInt(0x800, 0xd800); + } + else if (4 == t) + { + Buffer[i] = (char)NextInt(0xe000, 0xffff); + } + } + Buffer[end] = ' '; + return 1 + end; + } + + public virtual string GetString(int nTokens) + { + nTokens = nTokens != 0 ? nTokens : r.Next(4) + 1; + + // Half the time make a random UTF8 string + if (r.NextBoolean()) + { + return GetUTF8String(nTokens); + } + + // avoid StringBuffer because it adds extra synchronization. + char[] arr = new char[nTokens * 2]; + for (int i = 0; i < nTokens; i++) + { + arr[i * 2] = (char)('A' + r.Next(10)); + arr[i * 2 + 1] = ' '; + } + return new string(arr); + } + + public virtual string GetUTF8String(int nTokens) + { + int upto = 0; + Arrays.Fill(Buffer, (char)0); + for (int i = 0; i < nTokens; i++) + { + upto = AddUTF8Token(upto); + } + return new string(Buffer, 0, upto); + } + + public virtual string IdString + { + get + { + return Convert.ToString(@base + NextInt(Range)); + } + } + + public virtual void IndexDoc() + { + Document d = new Document(); + + FieldType customType1 = new FieldType(TextField.TYPE_STORED); + customType1.IsTokenized = false; + customType1.OmitNorms = true; + + List<Field> fields = new List<Field>(); + string idString = IdString; + Field idField = OuterInstance.NewField("id", idString, customType1); + fields.Add(idField); + + int nFields = NextInt(MaxFields); + for (int i = 0; i < nFields; i++) + { + FieldType customType = new FieldType(); + switch (NextInt(4)) + { + case 0: + break; + + case 1: + customType.StoreTermVectors = true; + break; + + case 2: + customType.StoreTermVectors = true; + customType.StoreTermVectorPositions = true; + break; + + case 3: + customType.StoreTermVectors = true; + customType.StoreTermVectorOffsets = true; + break; + } + + switch (NextInt(4)) + { + case 0: + customType.IsStored = true; + customType.OmitNorms = true; + customType.IsIndexed = true; + fields.Add(OuterInstance.NewField("f" + NextInt(100), GetString(1), customType)); + break; + + case 1: + customType.IsIndexed = true; + customType.IsTokenized = true; + fields.Add(OuterInstance.NewField("f" + NextInt(100), GetString(0), customType)); + break; + + case 2: + customType.IsStored = true; + customType.StoreTermVectors = false; + customType.StoreTermVectorOffsets = false; + customType.StoreTermVectorPositions = false; + fields.Add(OuterInstance.NewField("f" + NextInt(100), GetString(0), customType)); + break; + + case 3: + customType.IsStored = true; + customType.IsIndexed = true; + customType.IsTokenized = true; + fields.Add(OuterInstance.NewField("f" + NextInt(100), GetString(BigFieldSize), customType)); + break; + } + } + + if (SameFieldOrder) + { + fields.Sort(fieldNameComparer); + } + else + { + // random placement of id field also + Collections.Swap(fields, NextInt(fields.Count), 0); + } + + for (int i = 0; i < fields.Count; i++) + { + d.Add(fields[i]); + } + if (VERBOSE) + { + Console.WriteLine(Thread.CurrentThread.Name + ": indexing id:" + idString); + } + w.UpdateDocument(new Term("id", idString), d); + //System.out.println(Thread.currentThread().getName() + ": indexing "+d); + Docs[idString] = d; + } + + public virtual void DeleteDoc() + { + string idString = IdString; + if (VERBOSE) + { + Console.WriteLine(Thread.CurrentThread.Name + ": del id:" + idString); + } + w.DeleteDocuments(new Term("id", idString)); + Docs.Remove(idString); + } + + public virtual void DeleteByQuery() + { + string idString = IdString; + if (VERBOSE) + { + Console.WriteLine(Thread.CurrentThread.Name + ": del query id:" + idString); + } + w.DeleteDocuments(new TermQuery(new Term("id", idString))); + Docs.Remove(idString); + } + + public override void Run() + { + try + { + r = new Random(@base + Range + Seed); + for (int i = 0; i < Iterations; i++) + { + int what = NextInt(100); + if (what < 5) + { + DeleteDoc(); + } + else if (what < 10) + { + DeleteByQuery(); + } + else + { + IndexDoc(); + } + } + } + catch (Exception e) + { + Console.WriteLine(e.ToString()); + Console.Write(e.StackTrace); + Assert.Fail(e.ToString()); + } + + lock (this) + { + int dummy = Docs.Count; + } + } + } + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestStressNRT.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestStressNRT.cs b/src/Lucene.Net.Tests/Index/TestStressNRT.cs new file mode 100644 index 0000000..b9d52cb --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestStressNRT.cs @@ -0,0 +1,530 @@ +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Diagnostics; +using System.Threading; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using Lucene.Net.Randomized.Generators; + using Lucene.Net.Support; + using NUnit.Framework; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using FieldType = FieldType; + using IndexSearcher = Lucene.Net.Search.IndexSearcher; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using Query = Lucene.Net.Search.Query; + using ScoreDoc = Lucene.Net.Search.ScoreDoc; + using TermQuery = Lucene.Net.Search.TermQuery; + using TestUtil = Lucene.Net.Util.TestUtil; + using TopDocs = Lucene.Net.Search.TopDocs; + + [TestFixture] + public class TestStressNRT : LuceneTestCase + { + internal volatile DirectoryReader Reader; + + internal readonly ConcurrentDictionary<int, long> Model = new ConcurrentDictionary<int, long>(); + internal IDictionary<int, long> CommittedModel = new Dictionary<int, long>(); + internal long SnapshotCount; + internal long CommittedModelClock; + internal volatile int LastId; + internal readonly string Field = "val_l"; + internal object[] SyncArr; + + private void InitModel(int ndocs) + { + SnapshotCount = 0; + CommittedModelClock = 0; + LastId = 0; + + SyncArr = new object[ndocs]; + + for (int i = 0; i < ndocs; i++) + { + Model[i] = -1L; + SyncArr[i] = new object(); + } + CommittedModel.PutAll(Model); + } + + [Test] + public virtual void Test() + { + // update variables + int commitPercent = Random().Next(20); + int softCommitPercent = Random().Next(100); // what percent of the commits are soft + int deletePercent = Random().Next(50); + int deleteByQueryPercent = Random().Next(25); + int ndocs = AtLeast(50); + int nWriteThreads = TestUtil.NextInt(Random(), 1, TEST_NIGHTLY ? 10 : 5); + int maxConcurrentCommits = TestUtil.NextInt(Random(), 1, TEST_NIGHTLY ? 10 : 5); // number of committers at a time... needed if we want to avoid commit errors due to exceeding the max + + bool tombstones = Random().NextBoolean(); + + // query variables + AtomicInt64 operations = new AtomicInt64(AtLeast(10000)); // number of query operations to perform in total + + int nReadThreads = TestUtil.NextInt(Random(), 1, TEST_NIGHTLY ? 10 : 5); + InitModel(ndocs); + + FieldType storedOnlyType = new FieldType(); + storedOnlyType.IsStored = true; + + if (VERBOSE) + { + Console.WriteLine("\n"); + Console.WriteLine("TEST: commitPercent=" + commitPercent); + Console.WriteLine("TEST: softCommitPercent=" + softCommitPercent); + Console.WriteLine("TEST: deletePercent=" + deletePercent); + Console.WriteLine("TEST: deleteByQueryPercent=" + deleteByQueryPercent); + Console.WriteLine("TEST: ndocs=" + ndocs); + Console.WriteLine("TEST: nWriteThreads=" + nWriteThreads); + Console.WriteLine("TEST: nReadThreads=" + nReadThreads); + Console.WriteLine("TEST: maxConcurrentCommits=" + maxConcurrentCommits); + Console.WriteLine("TEST: tombstones=" + tombstones); + Console.WriteLine("TEST: operations=" + operations); + Console.WriteLine("\n"); + } + + AtomicInt32 numCommitting = new AtomicInt32(); + + IList<ThreadClass> threads = new List<ThreadClass>(); + + Directory dir = NewDirectory(); + + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random()))); + writer.DoRandomForceMergeAssert = false; + writer.Commit(); + Reader = DirectoryReader.Open(dir); + + for (int i = 0; i < nWriteThreads; i++) + { + ThreadClass thread = new ThreadAnonymousInnerClassHelper(this, "WRITER" + i, commitPercent, softCommitPercent, deletePercent, deleteByQueryPercent, ndocs, maxConcurrentCommits, tombstones, operations, storedOnlyType, numCommitting, writer); + + threads.Add(thread); + } + + for (int i = 0; i < nReadThreads; i++) + { + ThreadClass thread = new ThreadAnonymousInnerClassHelper2(this, "READER" + i, ndocs, tombstones, operations); + + threads.Add(thread); + } + + foreach (ThreadClass thread in threads) + { + thread.Start(); + } + + foreach (ThreadClass thread in threads) + { + thread.Join(); + } + + writer.Dispose(); + if (VERBOSE) + { + Console.WriteLine("TEST: close reader=" + Reader); + } + Reader.Dispose(); + dir.Dispose(); + } + + private class ThreadAnonymousInnerClassHelper : ThreadClass + { + private readonly TestStressNRT OuterInstance; + + private int CommitPercent; + private int SoftCommitPercent; + private int DeletePercent; + private int DeleteByQueryPercent; + private int Ndocs; + private int MaxConcurrentCommits; + private bool Tombstones; + private AtomicInt64 Operations; + private FieldType StoredOnlyType; + private AtomicInt32 NumCommitting; + private RandomIndexWriter Writer; + + public ThreadAnonymousInnerClassHelper(TestStressNRT outerInstance, string str, int commitPercent, int softCommitPercent, int deletePercent, int deleteByQueryPercent, int ndocs, int maxConcurrentCommits, bool tombstones, AtomicInt64 operations, FieldType storedOnlyType, AtomicInt32 numCommitting, RandomIndexWriter writer) + : base(str) + { + this.OuterInstance = outerInstance; + this.CommitPercent = commitPercent; + this.SoftCommitPercent = softCommitPercent; + this.DeletePercent = deletePercent; + this.DeleteByQueryPercent = deleteByQueryPercent; + this.Ndocs = ndocs; + this.MaxConcurrentCommits = maxConcurrentCommits; + this.Tombstones = tombstones; + this.Operations = operations; + this.StoredOnlyType = storedOnlyType; + this.NumCommitting = numCommitting; + this.Writer = writer; + rand = new Random(Random().Next()); + } + + internal Random rand; + + public override void Run() + { + try + { + while (Operations.Get() > 0) + { + int oper = rand.Next(100); + + if (oper < CommitPercent) + { + if (NumCommitting.IncrementAndGet() <= MaxConcurrentCommits) + { + IDictionary<int, long> newCommittedModel; + long version; + DirectoryReader oldReader; + + lock (OuterInstance) + { + newCommittedModel = new Dictionary<int, long>(OuterInstance.Model); // take a snapshot + version = OuterInstance.SnapshotCount++; + oldReader = OuterInstance.Reader; + oldReader.IncRef(); // increment the reference since we will use this for reopening + } + + DirectoryReader newReader; + if (rand.Next(100) < SoftCommitPercent) + { + // assertU(h.Commit("softCommit","true")); + if (Random().NextBoolean()) + { + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": call writer.getReader"); + } + newReader = Writer.GetReader(true); + } + else + { + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": reopen reader=" + oldReader + " version=" + version); + } + newReader = DirectoryReader.OpenIfChanged(oldReader, Writer.w, true); + } + } + else + { + // assertU(commit()); + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": commit+reopen reader=" + oldReader + " version=" + version); + } + Writer.Commit(); + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": now reopen after commit"); + } + newReader = DirectoryReader.OpenIfChanged(oldReader); + } + + // Code below assumes newReader comes w/ + // extra ref: + if (newReader == null) + { + oldReader.IncRef(); + newReader = oldReader; + } + + oldReader.DecRef(); + + lock (OuterInstance) + { + // install the new reader if it's newest (and check the current version since another reader may have already been installed) + //System.out.println(Thread.currentThread().getName() + ": newVersion=" + newReader.getVersion()); + Debug.Assert(newReader.RefCount > 0); + Debug.Assert(OuterInstance.Reader.RefCount > 0); + if (newReader.Version > OuterInstance.Reader.Version) + { + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": install new reader=" + newReader); + } + OuterInstance.Reader.DecRef(); + OuterInstance.Reader = newReader; + + // Silly: forces fieldInfos to be + // loaded so we don't hit IOE on later + // reader.toString + newReader.ToString(); + + // install this snapshot only if it's newer than the current one + if (version >= OuterInstance.CommittedModelClock) + { + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": install new model version=" + version); + } + OuterInstance.CommittedModel = newCommittedModel; + OuterInstance.CommittedModelClock = version; + } + else + { + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": skip install new model version=" + version); + } + } + } + else + { + // if the same reader, don't decRef. + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": skip install new reader=" + newReader); + } + newReader.DecRef(); + } + } + } + NumCommitting.DecrementAndGet(); + } + else + { + int id = rand.Next(Ndocs); + object sync = OuterInstance.SyncArr[id]; + + // set the lastId before we actually change it sometimes to try and + // uncover more race conditions between writing and reading + bool before = Random().NextBoolean(); + if (before) + { + OuterInstance.LastId = id; + } + + // We can't concurrently update the same document and retain our invariants of increasing values + // since we can't guarantee what order the updates will be executed. + lock (sync) + { + long val = OuterInstance.Model[id]; + long nextVal = Math.Abs(val) + 1; + + if (oper < CommitPercent + DeletePercent) + { + // assertU("<delete><id>" + id + "</id></delete>"); + + // add tombstone first + if (Tombstones) + { + Document d = new Document(); + d.Add(OuterInstance.NewStringField("id", "-" + Convert.ToString(id), Documents.Field.Store.YES)); + d.Add(OuterInstance.NewField(OuterInstance.Field, Convert.ToString(nextVal), StoredOnlyType)); + Writer.UpdateDocument(new Term("id", "-" + Convert.ToString(id)), d); + } + + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": term delDocs id:" + id + " nextVal=" + nextVal); + } + Writer.DeleteDocuments(new Term("id", Convert.ToString(id))); + OuterInstance.Model[id] = -nextVal; + } + else if (oper < CommitPercent + DeletePercent + DeleteByQueryPercent) + { + //assertU("<delete><query>id:" + id + "</query></delete>"); + + // add tombstone first + if (Tombstones) + { + Document d = new Document(); + d.Add(OuterInstance.NewStringField("id", "-" + Convert.ToString(id), Documents.Field.Store.YES)); + d.Add(OuterInstance.NewField(OuterInstance.Field, Convert.ToString(nextVal), StoredOnlyType)); + Writer.UpdateDocument(new Term("id", "-" + Convert.ToString(id)), d); + } + + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": query delDocs id:" + id + " nextVal=" + nextVal); + } + Writer.DeleteDocuments(new TermQuery(new Term("id", Convert.ToString(id)))); + OuterInstance.Model[id] = -nextVal; + } + else + { + // assertU(adoc("id",Integer.toString(id), field, Long.toString(nextVal))); + Document d = new Document(); + d.Add(OuterInstance.NewStringField("id", Convert.ToString(id), Documents.Field.Store.YES)); + d.Add(OuterInstance.NewField(OuterInstance.Field, Convert.ToString(nextVal), StoredOnlyType)); + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": u id:" + id + " val=" + nextVal); + } + Writer.UpdateDocument(new Term("id", Convert.ToString(id)), d); + if (Tombstones) + { + // remove tombstone after new addition (this should be optional?) + Writer.DeleteDocuments(new Term("id", "-" + Convert.ToString(id))); + } + OuterInstance.Model[id] = nextVal; + } + } + + if (!before) + { + OuterInstance.LastId = id; + } + } + } + } + catch (Exception e) + { + Console.WriteLine(Thread.CurrentThread.Name + ": FAILED: unexpected exception"); + Console.WriteLine(e.StackTrace); + throw new Exception(e.Message, e); + } + } + } + + private class ThreadAnonymousInnerClassHelper2 : ThreadClass + { + private readonly TestStressNRT OuterInstance; + + private int Ndocs; + private bool Tombstones; + private AtomicInt64 Operations; + + public ThreadAnonymousInnerClassHelper2(TestStressNRT outerInstance, string str, int ndocs, bool tombstones, AtomicInt64 operations) + : base(str) + { + this.OuterInstance = outerInstance; + this.Ndocs = ndocs; + this.Tombstones = tombstones; + this.Operations = operations; + rand = new Random(Random().Next()); + } + + internal Random rand; + + public override void Run() + { + try + { + IndexReader lastReader = null; + IndexSearcher lastSearcher = null; + + while (Operations.DecrementAndGet() >= 0) + { + // bias toward a recently changed doc + int id = rand.Next(100) < 25 ? OuterInstance.LastId : rand.Next(Ndocs); + + // when indexing, we update the index, then the model + // so when querying, we should first check the model, and then the index + + long val; + DirectoryReader r; + lock (OuterInstance) + { + val = OuterInstance.CommittedModel[id]; + r = OuterInstance.Reader; + r.IncRef(); + } + + if (VERBOSE) + { + Console.WriteLine("TEST: " + Thread.CurrentThread.Name + ": s id=" + id + " val=" + val + " r=" + r.Version); + } + + // sreq = req("wt","json", "q","id:"+Integer.toString(id), "omitHeader","true"); + IndexSearcher searcher; + if (r == lastReader) + { + // Just re-use lastSearcher, else + // newSearcher may create too many thread + // pools (ExecutorService): + searcher = lastSearcher; + } + else + { + searcher = OuterInstance.NewSearcher(r); + lastReader = r; + lastSearcher = searcher; + } + Query q = new TermQuery(new Term("id", Convert.ToString(id))); + TopDocs results = searcher.Search(q, 10); + + if (results.TotalHits == 0 && Tombstones) + { + // if we couldn't find the doc, look for its tombstone + q = new TermQuery(new Term("id", "-" + Convert.ToString(id))); + results = searcher.Search(q, 1); + if (results.TotalHits == 0) + { + if (val == -1L) + { + // expected... no doc was added yet + r.DecRef(); + continue; + } + Assert.Fail("No documents or tombstones found for id " + id + ", expected at least " + val + " reader=" + r); + } + } + + if (results.TotalHits == 0 && !Tombstones) + { + // nothing to do - we can't tell anything from a deleted doc without tombstones + } + else + { + // we should have found the document, or its tombstone + if (results.TotalHits != 1) + { + Console.WriteLine("FAIL: hits id:" + id + " val=" + val); + foreach (ScoreDoc sd in results.ScoreDocs) + { + Document doc = r.Document(sd.Doc); + Console.WriteLine(" docID=" + sd.Doc + " id:" + doc.Get("id") + " foundVal=" + doc.Get(OuterInstance.Field)); + } + Assert.Fail("id=" + id + " reader=" + r + " totalHits=" + results.TotalHits); + } + Document doc_ = searcher.Doc(results.ScoreDocs[0].Doc); + long foundVal = Convert.ToInt64(doc_.Get(OuterInstance.Field)); + if (foundVal < Math.Abs(val)) + { + Assert.Fail("foundVal=" + foundVal + " val=" + val + " id=" + id + " reader=" + r); + } + } + + r.DecRef(); + } + } + catch (Exception e) + { + Operations.Set((int)-1L); + Console.WriteLine(Thread.CurrentThread.Name + ": FAILED: unexpected exception"); + Console.WriteLine(e.StackTrace); + throw new Exception(e.Message, e); + } + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestSumDocFreq.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestSumDocFreq.cs b/src/Lucene.Net.Tests/Index/TestSumDocFreq.cs new file mode 100644 index 0000000..66f88c2 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestSumDocFreq.cs @@ -0,0 +1,112 @@ +using System; +using Lucene.Net.Documents; + +namespace Lucene.Net.Index +{ + using NUnit.Framework; + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Document = Documents.Document; + using Field = Field; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using TestUtil = Lucene.Net.Util.TestUtil; + + /// <summary> + /// Tests <seealso cref="Terms#getSumDocFreq()"/> + /// @lucene.experimental + /// </summary> + [TestFixture] + public class TestSumDocFreq : LuceneTestCase + { + [Test] + public virtual void TestSumDocFreq_Mem() + { + int numDocs = AtLeast(500); + + Directory dir = NewDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + + Document doc = new Document(); + Field id = NewStringField("id", "", Field.Store.NO); + Field field1 = NewTextField("foo", "", Field.Store.NO); + Field field2 = NewTextField("bar", "", Field.Store.NO); + doc.Add(id); + doc.Add(field1); + doc.Add(field2); + for (int i = 0; i < numDocs; i++) + { + id.SetStringValue("" + i); + char ch1 = (char)TestUtil.NextInt(Random(), 'a', 'z'); + char ch2 = (char)TestUtil.NextInt(Random(), 'a', 'z'); + field1.SetStringValue("" + ch1 + " " + ch2); + ch1 = (char)TestUtil.NextInt(Random(), 'a', 'z'); + ch2 = (char)TestUtil.NextInt(Random(), 'a', 'z'); + field2.SetStringValue("" + ch1 + " " + ch2); + writer.AddDocument(doc); + } + + IndexReader ir = writer.Reader; + + AssertSumDocFreq(ir); + ir.Dispose(); + + int numDeletions = AtLeast(20); + for (int i = 0; i < numDeletions; i++) + { + writer.DeleteDocuments(new Term("id", "" + Random().Next(numDocs))); + } + writer.ForceMerge(1); + writer.Dispose(); + + ir = DirectoryReader.Open(dir); + AssertSumDocFreq(ir); + ir.Dispose(); + dir.Dispose(); + } + + private void AssertSumDocFreq(IndexReader ir) + { + // compute sumDocFreq across all fields + Fields fields = MultiFields.GetFields(ir); + + foreach (string f in fields) + { + Terms terms = fields.GetTerms(f); + long sumDocFreq = terms.SumDocFreq; + if (sumDocFreq == -1) + { + if (VERBOSE) + { + Console.WriteLine("skipping field: " + f + ", codec does not support sumDocFreq"); + } + continue; + } + + long computedSumDocFreq = 0; + TermsEnum termsEnum = terms.GetIterator(null); + while (termsEnum.Next() != null) + { + computedSumDocFreq += termsEnum.DocFreq; + } + Assert.AreEqual(computedSumDocFreq, sumDocFreq); + } + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestTaskMergeSchedulerExternal.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestTaskMergeSchedulerExternal.cs b/src/Lucene.Net.Tests/Index/TestTaskMergeSchedulerExternal.cs new file mode 100644 index 0000000..2a38dae --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestTaskMergeSchedulerExternal.cs @@ -0,0 +1,146 @@ +using Lucene.Net.Documents; +using NUnit.Framework; +using System; +using System.IO; + +namespace Lucene.Net.Tests +{ + using Index; + using Util; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using Field = Field; + using IndexWriter = Lucene.Net.Index.IndexWriter; + using IndexWriterConfig = Lucene.Net.Index.IndexWriterConfig; + using LogMergePolicy = Lucene.Net.Index.LogMergePolicy; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MergePolicy = Lucene.Net.Index.MergePolicy; + using MergeScheduler = Lucene.Net.Index.MergeScheduler; + using MergeTrigger = Lucene.Net.Index.MergeTrigger; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using MockDirectoryWrapper = Lucene.Net.Store.MockDirectoryWrapper; + using RAMDirectory = Lucene.Net.Store.RAMDirectory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + /// <summary> + /// Holds tests cases to verify external APIs are accessible + /// while not being in Lucene.Net.Index package. + /// </summary> + public class TestTaskMergeSchedulerExternal : LuceneTestCase + { + internal volatile bool MergeCalled; + internal volatile bool ExcCalled; + + private class MyMergeScheduler : TaskMergeScheduler + { + private readonly TestTaskMergeSchedulerExternal OuterInstance; + + public MyMergeScheduler(TestTaskMergeSchedulerExternal outerInstance) + { + this.OuterInstance = outerInstance; + } + + protected override void HandleMergeException(Exception t) + { + OuterInstance.ExcCalled = true; + } + + public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) + { + OuterInstance.MergeCalled = true; + base.Merge(writer, trigger, newMergesFound); + } + } + + private class FailOnlyOnMerge : MockDirectoryWrapper.Failure + { + public override void Eval(MockDirectoryWrapper dir) + { + if (StackTraceHelper.DoesStackTraceContainMethod("DoMerge")) + { + throw new IOException("now failing during merge"); + } + } + } + + [Test] + public void TestSubclassTaskMergeScheduler() + { + MockDirectoryWrapper dir = NewMockDirectory(); + dir.FailOn(new FailOnlyOnMerge()); + + Document doc = new Document(); + Field idField = NewStringField("id", "", Field.Store.YES); + doc.Add(idField); + + IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMergeScheduler(new MyMergeScheduler(this)).SetMaxBufferedDocs(2).SetRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH).SetMergePolicy(NewLogMergePolicy())); + LogMergePolicy logMP = (LogMergePolicy)writer.Config.MergePolicy; + logMP.MergeFactor = 10; + for (int i = 0; i < 20; i++) + { + writer.AddDocument(doc); + } + + ((MyMergeScheduler)writer.Config.MergeScheduler).Sync(); + writer.Dispose(); + + Assert.IsTrue(MergeCalled); + dir.Dispose(); + } + + private class ReportingMergeScheduler : MergeScheduler + { + public override void Merge(IndexWriter writer, MergeTrigger trigger, bool newMergesFound) + { + MergePolicy.OneMerge merge = null; + while ((merge = writer.NextMerge()) != null) + { + if (VERBOSE) + { + Console.WriteLine("executing merge " + merge.SegString(writer.Directory)); + } + writer.Merge(merge); + } + } + + protected override void Dispose(bool disposing) + { + } + } + + [Test] + public void TestCustomMergeScheduler() + { + // we don't really need to execute anything, just to make sure the custom MS + // compiles. But ensure that it can be used as well, e.g., no other hidden + // dependencies or something. Therefore, don't use any random API ! + Directory dir = new RAMDirectory(); + IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, null); + conf.SetMergeScheduler(new ReportingMergeScheduler()); + IndexWriter writer = new IndexWriter(dir, conf); + writer.AddDocument(new Document()); + writer.Commit(); // trigger flush + writer.AddDocument(new Document()); + writer.Commit(); // trigger flush + writer.ForceMerge(1); + writer.Dispose(); + dir.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestTerm.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestTerm.cs b/src/Lucene.Net.Tests/Index/TestTerm.cs new file mode 100644 index 0000000..308dbba --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestTerm.cs @@ -0,0 +1,42 @@ +namespace Lucene.Net.Index +{ + using NUnit.Framework; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + [TestFixture] + public class TestTerm : LuceneTestCase + { + [Test] + public virtual void TestEquals() + { + Term @base = new Term("same", "same"); + Term same = new Term("same", "same"); + Term differentField = new Term("different", "same"); + Term differentText = new Term("same", "different"); + const string differentType = "AString"; + Assert.AreEqual(@base, @base); + Assert.AreEqual(@base, same); + Assert.IsFalse(@base.Equals(differentField)); + Assert.IsFalse(@base.Equals(differentText)); + Assert.IsFalse(@base.Equals(differentType)); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestTermVectorsFormat.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestTermVectorsFormat.cs b/src/Lucene.Net.Tests/Index/TestTermVectorsFormat.cs new file mode 100644 index 0000000..b189e41 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestTermVectorsFormat.cs @@ -0,0 +1,119 @@ +using NUnit.Framework; + +namespace Lucene.Net.Index +{ + using System.Collections.Generic; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Codec = Lucene.Net.Codecs.Codec; + using Lucene3xCodec = Lucene.Net.Codecs.Lucene3x.Lucene3xCodec; + + /// <summary> + /// Tests with the default randomized codec. Not really redundant with + /// other specific instantiations since we want to test some test-only impls + /// like Asserting, as well as make it easy to write a codec and pass -Dtests.codec + /// </summary> + [TestFixture] + public class TestTermVectorsFormat : BaseTermVectorsFormatTestCase + { + protected override Codec Codec + { + get + { + return Codec.Default; + } + } + + protected override IEnumerable<Options> ValidOptions() + { +#pragma warning disable 612, 618 + if (Codec is Lucene3xCodec) +#pragma warning restore 612, 618 + { + // payloads are not supported on vectors in 3.x indexes + return ValidOptions(Options.NONE, Options.POSITIONS_AND_OFFSETS); + } + else + { + return base.ValidOptions(); + } + } + + [Test] + public override void TestMergeStability() + { + AssumeTrue("The MockRandom PF randomizes content on the fly, so we can't check it", false); + } + + + + #region BaseTermVectorsFormatTestCase + // LUCENENET NOTE: Tests in an abstract base class are not pulled into the correct + // context in Visual Studio. This fixes that with the minimum amount of code necessary + // to run them in the correct context without duplicating all of the tests. + + [Test] + // only one doc with vectors + public override void TestRareVectors() + { + base.TestRareVectors(); + } + + [Test] + public override void TestHighFreqs() + { + base.TestHighFreqs(); + } + + [Test] + public override void TestLotsOfFields() + { + base.TestLotsOfFields(); + } + + [Test] + // different options for the same field + public override void TestMixedOptions() + { + base.TestMixedOptions(); + } + + [Test] + public override void TestRandom() + { + base.TestRandom(); + } + + [Test] + public override void TestMerge() + { + base.TestMerge(); + } + + [Test] + // run random tests from different threads to make sure the per-thread clones + // don't share mutable data + public override void TestClone() + { + base.TestClone(); + } + + #endregion + } +} \ No newline at end of file
