http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestCheckIndex.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestCheckIndex.cs b/src/Lucene.Net.Tests/Index/TestCheckIndex.cs new file mode 100644 index 0000000..7b6186e --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestCheckIndex.cs @@ -0,0 +1,129 @@ +using Lucene.Net.Documents; +using Lucene.Net.Support; +using System; +using System.Collections.Generic; +using System.Text; + +namespace Lucene.Net.Index +{ + using NUnit.Framework; + using System.IO; + using CannedTokenStream = Lucene.Net.Analysis.CannedTokenStream; + using Directory = Lucene.Net.Store.Directory; + using Document = Documents.Document; + using Field = Field; + using FieldType = FieldType; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using TextField = TextField; + using Token = Lucene.Net.Analysis.Token; + + [TestFixture] + public class TestCheckIndex : LuceneTestCase + { + [Test] + public virtual void TestDeletedDocs() + { + Directory dir = NewDirectory(); + IndexWriter writer = new IndexWriter(dir, (IndexWriterConfig)NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())).SetMaxBufferedDocs(2)); + for (int i = 0; i < 19; i++) + { + Document doc = new Document(); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.StoreTermVectors = true; + customType.StoreTermVectorPositions = true; + customType.StoreTermVectorOffsets = true; + doc.Add(NewField("field", "aaa" + i, customType)); + writer.AddDocument(doc); + } + writer.ForceMerge(1); + writer.Commit(); + writer.DeleteDocuments(new Term("field", "aaa5")); + writer.Dispose(); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); + CheckIndex checker = new CheckIndex(dir); + checker.InfoStream = new StreamWriter(bos, Encoding.UTF8); + if (VERBOSE) + { + checker.InfoStream = Console.Out; + } + CheckIndex.Status indexStatus = checker.DoCheckIndex(); + if (indexStatus.Clean == false) + { + Console.WriteLine("CheckIndex failed"); + checker.FlushInfoStream(); + Console.WriteLine(bos.ToString()); + Assert.Fail(); + } + + CheckIndex.Status.SegmentInfoStatus seg = indexStatus.SegmentInfos[0]; + Assert.IsTrue(seg.OpenReaderPassed); + + Assert.IsNotNull(seg.Diagnostics); + + Assert.IsNotNull(seg.FieldNormStatus); + Assert.IsNull(seg.FieldNormStatus.Error); + Assert.AreEqual(1, seg.FieldNormStatus.TotFields); + + Assert.IsNotNull(seg.TermIndexStatus); + Assert.IsNull(seg.TermIndexStatus.Error); + Assert.AreEqual(18, seg.TermIndexStatus.TermCount); + Assert.AreEqual(18, seg.TermIndexStatus.TotFreq); + Assert.AreEqual(18, seg.TermIndexStatus.TotPos); + + Assert.IsNotNull(seg.StoredFieldStatus); + Assert.IsNull(seg.StoredFieldStatus.Error); + Assert.AreEqual(18, seg.StoredFieldStatus.DocCount); + Assert.AreEqual(18, seg.StoredFieldStatus.TotFields); + + Assert.IsNotNull(seg.TermVectorStatus); + Assert.IsNull(seg.TermVectorStatus.Error); + Assert.AreEqual(18, seg.TermVectorStatus.DocCount); + Assert.AreEqual(18, seg.TermVectorStatus.TotVectors); + + Assert.IsTrue(seg.Diagnostics.Count > 0); + IList<string> onlySegments = new List<string>(); + onlySegments.Add("_0"); + + Assert.IsTrue(checker.DoCheckIndex(onlySegments).Clean == true); + dir.Dispose(); + } + + // LUCENE-4221: we have to let these thru, for now + [Test] + public virtual void TestBogusTermVectors() + { + Directory dir = NewDirectory(); + IndexWriter iw = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, null)); + Document doc = new Document(); + FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); + ft.StoreTermVectors = true; + ft.StoreTermVectorOffsets = true; + Field field = new Field("foo", "", ft); + field.SetTokenStream(new CannedTokenStream(new Token("bar", 5, 10), new Token("bar", 1, 4))); + doc.Add(field); + iw.AddDocument(doc); + iw.Dispose(); + dir.Dispose(); // checkindex + } + } +} \ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestCodecHoldsOpenFiles.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestCodecHoldsOpenFiles.cs b/src/Lucene.Net.Tests/Index/TestCodecHoldsOpenFiles.cs new file mode 100644 index 0000000..66622af --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestCodecHoldsOpenFiles.cs @@ -0,0 +1,116 @@ +using Lucene.Net.Documents; +using NUnit.Framework; + +namespace Lucene.Net.Index +{ + using Attributes; + using System.IO; + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Document = Documents.Document; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using TestUtil = Lucene.Net.Util.TestUtil; + using TextField = TextField; + + [TestFixture] + public class TestCodecHoldsOpenFiles : LuceneTestCase + { + [Test] + public virtual void Test() + { + Directory d = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); + int numDocs = AtLeast(100); + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + doc.Add(NewField("foo", "bar", TextField.TYPE_NOT_STORED)); + w.AddDocument(doc); + } + + IndexReader r = w.Reader; + w.Dispose(); + + foreach (string fileName in d.ListAll()) + { + try + { + d.DeleteFile(fileName); + } +#pragma warning disable 168 + catch (IOException ioe) +#pragma warning restore 168 + { + // ignore: this means codec (correctly) is holding + // the file open + } + } + + foreach (AtomicReaderContext cxt in r.Leaves) + { + TestUtil.CheckReader(cxt.Reader); + } + + r.Dispose(); + d.Dispose(); + } + + [Test, LuceneNetSpecific] // Apparently added to LUCENENET for debugging + public virtual void TestExposeUnclosedFiles() + { + Directory d = NewDirectory(); + RandomIndexWriter w = new RandomIndexWriter(Random(), d, Similarity, TimeZone); + //int numDocs = AtLeast(100); + int numDocs = 5; + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + doc.Add(NewField("foo", "bar", TextField.TYPE_NOT_STORED)); + w.AddDocument(doc); + } + + IndexReader r = w.Reader; + w.Dispose(); + + foreach (string fileName in d.ListAll()) + { + try + { + d.DeleteFile(fileName); + } +#pragma warning disable 168 + catch (IOException ioe) +#pragma warning restore 168 + { + // ignore: this means codec (correctly) is holding + // the file open + } + } + + foreach (AtomicReaderContext cxt in r.Leaves) + { + TestUtil.CheckReader(cxt.Reader); + } + + r.Dispose(); + d.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestCodecs.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestCodecs.cs b/src/Lucene.Net.Tests/Index/TestCodecs.cs new file mode 100644 index 0000000..e798da9 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestCodecs.cs @@ -0,0 +1,933 @@ +using Lucene.Net.Codecs.MockSep; +using Lucene.Net.Documents; +using Lucene.Net.Search; +using Lucene.Net.Support; +using NUnit.Framework; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Threading; + +namespace Lucene.Net.Index +{ + using BytesRef = Lucene.Net.Util.BytesRef; + using Codec = Lucene.Net.Codecs.Codec; + using Constants = Lucene.Net.Util.Constants; + using Directory = Lucene.Net.Store.Directory; + using DocIdSetIterator = Lucene.Net.Search.DocIdSetIterator; + + //using MockSepPostingsFormat = Lucene.Net.Codecs.mocksep.MockSepPostingsFormat; + using Document = Documents.Document; + using FieldsConsumer = Lucene.Net.Codecs.FieldsConsumer; + using FieldsProducer = Lucene.Net.Codecs.FieldsProducer; + using FieldType = FieldType; + using IndexSearcher = Lucene.Net.Search.IndexSearcher; + using InfoStream = Lucene.Net.Util.InfoStream; + using Lucene3xCodec = Lucene.Net.Codecs.Lucene3x.Lucene3xCodec; + using Lucene40RWCodec = Lucene.Net.Codecs.Lucene40.Lucene40RWCodec; + using Lucene41RWCodec = Lucene.Net.Codecs.Lucene41.Lucene41RWCodec; + using Lucene42RWCodec = Lucene.Net.Codecs.Lucene42.Lucene42RWCodec; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using MockAnalyzer = Lucene.Net.Analysis.MockAnalyzer; + using NumericDocValuesField = NumericDocValuesField; + using OpenBitSet = Lucene.Net.Util.OpenBitSet; + using PostingsConsumer = Lucene.Net.Codecs.PostingsConsumer; + using Query = Lucene.Net.Search.Query; + using ScoreDoc = Lucene.Net.Search.ScoreDoc; + using Store = Field.Store; + using StringField = StringField; + using TermsConsumer = Lucene.Net.Codecs.TermsConsumer; + using TermStats = Lucene.Net.Codecs.TermStats; + using TestUtil = Lucene.Net.Util.TestUtil; + + // TODO: test multiple codecs here? + + // TODO + // - test across fields + // - fix this test to run once for all codecs + // - make more docs per term, to test > 1 level skipping + // - test all combinations of payloads/not and omitTF/not + // - test w/ different indexDivisor + // - test field where payload length rarely changes + // - 0-term fields + // - seek/skip to same term/doc i'm already on + // - mix in deleted docs + // - seek, skip beyond end -- assert returns false + // - seek, skip to things that don't exist -- ensure it + // goes to 1 before next one known to exist + // - skipTo(term) + // - skipTo(doc) + + [TestFixture] + public class TestCodecs : LuceneTestCase + { + private static string[] FieldNames = new string[] { "one", "two", "three", "four" }; + + private static int NUM_TEST_ITER; + private const int NUM_TEST_THREADS = 3; + private const int NUM_FIELDS = 4; + private const int NUM_TERMS_RAND = 50; // must be > 16 to test skipping + private const int DOC_FREQ_RAND = 500; // must be > 16 to test skipping + private const int TERM_DOC_FREQ_RAND = 20; + + [OneTimeSetUp] + public static void BeforeClass() + { + NUM_TEST_ITER = AtLeast(20); + } + + internal class FieldData : IComparable<FieldData> + { + private readonly TestCodecs OuterInstance; + + internal readonly FieldInfo FieldInfo; + internal readonly TermData[] Terms; + internal readonly bool OmitTF; + internal readonly bool StorePayloads; + + public FieldData(TestCodecs outerInstance, string name, FieldInfos.Builder fieldInfos, TermData[] terms, bool omitTF, bool storePayloads) + { + this.OuterInstance = outerInstance; + this.OmitTF = omitTF; + this.StorePayloads = storePayloads; + // TODO: change this test to use all three + FieldInfo = fieldInfos.AddOrUpdate(name, new IndexableFieldTypeAnonymousInnerClassHelper(this, omitTF)); + if (storePayloads) + { + FieldInfo.SetStorePayloads(); + } + this.Terms = terms; + for (int i = 0; i < terms.Length; i++) + { + terms[i].Field = this; + } + + Array.Sort(terms); + } + + private class IndexableFieldTypeAnonymousInnerClassHelper : IIndexableFieldType + { + private readonly FieldData OuterInstance; + + private bool OmitTF; + + public IndexableFieldTypeAnonymousInnerClassHelper(FieldData outerInstance, bool omitTF) + { + this.OuterInstance = outerInstance; + this.OmitTF = omitTF; + } + + public bool IsIndexed + { + get { return true; } + set { } + } + + public bool IsStored + { + get { return false; } + set { } + } + + public bool IsTokenized + { + get { return false; } + set { } + } + + public bool StoreTermVectors + { + get { return false; } + set { } + } + + public bool StoreTermVectorOffsets + { + get { return false; } + set { } + } + + public bool StoreTermVectorPositions + { + get { return false; } + set { } + } + + public bool StoreTermVectorPayloads + { + get { return false; } + set { } + } + + public bool OmitNorms + { + get { return false; } + set { } + } + + public IndexOptions? IndexOptions + { + get { return OmitTF ? Index.IndexOptions.DOCS_ONLY : Index.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } + set { } + } + + public NumericType? NumericType + { + get { throw new NotImplementedException(); } + set { } + } + + public DocValuesType? DocValueType + { + get { return null; } + set { } + } + } + + public int CompareTo(FieldData other) + { + return FieldInfo.Name.CompareTo(other.FieldInfo.Name); + } + + public virtual void Write(FieldsConsumer consumer) + { + Array.Sort(Terms); + TermsConsumer termsConsumer = consumer.AddField(FieldInfo); + long sumTotalTermCount = 0; + long sumDF = 0; + OpenBitSet visitedDocs = new OpenBitSet(); + foreach (TermData term in Terms) + { + for (int i = 0; i < term.Docs.Length; i++) + { + visitedDocs.Set(term.Docs[i]); + } + sumDF += term.Docs.Length; + sumTotalTermCount += term.Write(termsConsumer); + } + termsConsumer.Finish(OmitTF ? -1 : sumTotalTermCount, sumDF, (int)visitedDocs.Cardinality()); + } + } + + internal class PositionData + { + private readonly TestCodecs OuterInstance; + + internal int Pos; + internal BytesRef Payload; + + internal PositionData(TestCodecs outerInstance, int pos, BytesRef payload) + { + this.OuterInstance = outerInstance; + this.Pos = pos; + this.Payload = payload; + } + } + + internal class TermData : IComparable<TermData> + { + private readonly TestCodecs OuterInstance; + + internal string Text2; + internal readonly BytesRef Text; + internal int[] Docs; + internal PositionData[][] Positions; + internal FieldData Field; + + public TermData(TestCodecs outerInstance, string text, int[] docs, PositionData[][] positions) + { + this.OuterInstance = outerInstance; + this.Text = new BytesRef(text); + this.Text2 = text; + this.Docs = docs; + this.Positions = positions; + } + + public virtual int CompareTo(TermData o) + { + return Text.CompareTo(o.Text); + } + + public virtual long Write(TermsConsumer termsConsumer) + { + PostingsConsumer postingsConsumer = termsConsumer.StartTerm(Text); + long totTF = 0; + for (int i = 0; i < Docs.Length; i++) + { + int termDocFreq; + if (Field.OmitTF) + { + termDocFreq = -1; + } + else + { + termDocFreq = Positions[i].Length; + } + postingsConsumer.StartDoc(Docs[i], termDocFreq); + if (!Field.OmitTF) + { + totTF += Positions[i].Length; + for (int j = 0; j < Positions[i].Length; j++) + { + PositionData pos = Positions[i][j]; + postingsConsumer.AddPosition(pos.Pos, pos.Payload, -1, -1); + } + } + postingsConsumer.FinishDoc(); + } + termsConsumer.FinishTerm(Text, new TermStats(Docs.Length, Field.OmitTF ? -1 : totTF)); + return totTF; + } + } + + private const string SEGMENT = "0"; + + internal virtual TermData[] MakeRandomTerms(bool omitTF, bool storePayloads) + { + int numTerms = 1 + Random().Next(NUM_TERMS_RAND); + //final int numTerms = 2; + TermData[] terms = new TermData[numTerms]; + + HashSet<string> termsSeen = new HashSet<string>(); + + for (int i = 0; i < numTerms; i++) + { + // Make term text + string text2; + while (true) + { + text2 = TestUtil.RandomUnicodeString(Random()); + if (!termsSeen.Contains(text2) && !text2.EndsWith(".")) + { + termsSeen.Add(text2); + break; + } + } + + int docFreq = 1 + Random().Next(DOC_FREQ_RAND); + int[] docs = new int[docFreq]; + PositionData[][] positions; + + if (!omitTF) + { + positions = new PositionData[docFreq][]; + } + else + { + positions = null; + } + + int docID = 0; + for (int j = 0; j < docFreq; j++) + { + docID += TestUtil.NextInt(Random(), 1, 10); + docs[j] = docID; + + if (!omitTF) + { + int termFreq = 1 + Random().Next(TERM_DOC_FREQ_RAND); + positions[j] = new PositionData[termFreq]; + int position = 0; + for (int k = 0; k < termFreq; k++) + { + position += TestUtil.NextInt(Random(), 1, 10); + + BytesRef payload; + if (storePayloads && Random().Next(4) == 0) + { + var bytes = new byte[1 + Random().Next(5)]; + for (int l = 0; l < bytes.Length; l++) + { + bytes[l] = (byte)Random().Next(255); + } + payload = new BytesRef(bytes); + } + else + { + payload = null; + } + + positions[j][k] = new PositionData(this, position, payload); + } + } + } + + terms[i] = new TermData(this, text2, docs, positions); + } + + return terms; + } + + [Test] + public virtual void TestFixedPostings() + { + const int NUM_TERMS = 100; + TermData[] terms = new TermData[NUM_TERMS]; + for (int i = 0; i < NUM_TERMS; i++) + { + int[] docs = new int[] { i }; + string text = Convert.ToString(i); + terms[i] = new TermData(this, text, docs, null); + } + + FieldInfos.Builder builder = new FieldInfos.Builder(); + + FieldData field = new FieldData(this, "field", builder, terms, true, false); + FieldData[] fields = new FieldData[] { field }; + FieldInfos fieldInfos = builder.Finish(); + Directory dir = NewDirectory(); + this.Write(fieldInfos, dir, fields, true); + Codec codec = Codec.Default; + SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); + + FieldsProducer reader = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)); + + IEnumerator<string> fieldsEnum = reader.GetEnumerator(); + fieldsEnum.MoveNext(); + string fieldName = fieldsEnum.Current; + Assert.IsNotNull(fieldName); + Terms terms2 = reader.GetTerms(fieldName); + Assert.IsNotNull(terms2); + + TermsEnum termsEnum = terms2.GetIterator(null); + + DocsEnum docsEnum = null; + for (int i = 0; i < NUM_TERMS; i++) + { + BytesRef term = termsEnum.Next(); + Assert.IsNotNull(term); + Assert.AreEqual(terms[i].Text2, term.Utf8ToString()); + + // do this twice to stress test the codec's reuse, ie, + // make sure it properly fully resets (rewinds) its + // internal state: + for (int iter = 0; iter < 2; iter++) + { + docsEnum = TestUtil.Docs(Random(), termsEnum, null, docsEnum, DocsEnum.FLAG_NONE); + Assert.AreEqual(terms[i].Docs[0], docsEnum.NextDoc()); + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); + } + } + Assert.IsNull(termsEnum.Next()); + + for (int i = 0; i < NUM_TERMS; i++) + { + Assert.AreEqual(termsEnum.SeekCeil(new BytesRef(terms[i].Text2)), TermsEnum.SeekStatus.FOUND); + } + + Assert.IsFalse(fieldsEnum.MoveNext()); + reader.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestRandomPostings() + { + FieldInfos.Builder builder = new FieldInfos.Builder(); + + FieldData[] fields = new FieldData[NUM_FIELDS]; + for (int i = 0; i < NUM_FIELDS; i++) + { + bool omitTF = 0 == (i % 3); + bool storePayloads = 1 == (i % 3); + fields[i] = new FieldData(this, FieldNames[i], builder, this.MakeRandomTerms(omitTF, storePayloads), omitTF, storePayloads); + } + + Directory dir = NewDirectory(); + FieldInfos fieldInfos = builder.Finish(); + + if (VERBOSE) + { + Console.WriteLine("TEST: now write postings"); + } + + this.Write(fieldInfos, dir, fields, false); + Codec codec = Codec.Default; + SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); + + if (VERBOSE) + { + Console.WriteLine("TEST: now read postings"); + } + FieldsProducer terms = codec.PostingsFormat.FieldsProducer(new SegmentReadState(dir, si, fieldInfos, NewIOContext(Random()), DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR)); + + Verify[] threads = new Verify[NUM_TEST_THREADS - 1]; + for (int i = 0; i < NUM_TEST_THREADS - 1; i++) + { + threads[i] = new Verify(this, si, fields, terms); + threads[i].SetDaemon(true); + threads[i].Start(); + } + + (new Verify(this, si, fields, terms)).Run(); + + for (int i = 0; i < NUM_TEST_THREADS - 1; i++) + { + threads[i].Join(); + Debug.Assert(!threads[i].Failed); + } + + terms.Dispose(); + dir.Dispose(); + } + + [Test] + public virtual void TestSepPositionAfterMerge() + { + Directory dir = NewDirectory(); + IndexWriterConfig config = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); + config.SetMergePolicy(NewLogMergePolicy()); + config.SetCodec(TestUtil.AlwaysPostingsFormat(new MockSepPostingsFormat())); + IndexWriter writer = new IndexWriter(dir, config); + + try + { + PhraseQuery pq = new PhraseQuery(); + pq.Add(new Term("content", "bbb")); + pq.Add(new Term("content", "ccc")); + + Document doc = new Document(); + FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); + customType.OmitNorms = true; + doc.Add(NewField("content", "aaa bbb ccc ddd", customType)); + + // add document and force commit for creating a first segment + writer.AddDocument(doc); + writer.Commit(); + + ScoreDoc[] results = this.Search(writer, pq, 5); + Assert.AreEqual(1, results.Length); + Assert.AreEqual(0, results[0].Doc); + + // add document and force commit for creating a second segment + writer.AddDocument(doc); + writer.Commit(); + + // at this point, there should be at least two segments + results = this.Search(writer, pq, 5); + Assert.AreEqual(2, results.Length); + Assert.AreEqual(0, results[0].Doc); + + writer.ForceMerge(1); + + // optimise to merge the segments. + results = this.Search(writer, pq, 5); + Assert.AreEqual(2, results.Length); + Assert.AreEqual(0, results[0].Doc); + } + finally + { + writer.Dispose(); + dir.Dispose(); + } + } + + private ScoreDoc[] Search(IndexWriter writer, Query q, int n) + { + IndexReader reader = writer.Reader; + IndexSearcher searcher = NewSearcher(reader); + try + { + return searcher.Search(q, null, n).ScoreDocs; + } + finally + { + reader.Dispose(); + } + } + + private class Verify : ThreadClass + { + private readonly TestCodecs OuterInstance; + + internal readonly Fields TermsDict; + internal readonly FieldData[] Fields; + internal readonly SegmentInfo Si; + internal volatile bool Failed; + + internal Verify(TestCodecs outerInstance, SegmentInfo si, FieldData[] fields, Fields termsDict) + { + this.OuterInstance = outerInstance; + this.Fields = fields; + this.TermsDict = termsDict; + this.Si = si; + } + + public override void Run() + { + try + { + this._run(); + } + catch (Exception t) + { + Failed = true; + throw new Exception(t.Message, t); + } + } + + internal virtual void VerifyDocs(int[] docs, PositionData[][] positions, DocsEnum docsEnum, bool doPos) + { + for (int i = 0; i < docs.Length; i++) + { + int doc = docsEnum.NextDoc(); + Assert.IsTrue(doc != DocIdSetIterator.NO_MORE_DOCS); + Assert.AreEqual(docs[i], doc); + if (doPos) + { + this.VerifyPositions(positions[i], ((DocsAndPositionsEnum)docsEnum)); + } + } + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docsEnum.NextDoc()); + } + + internal sbyte[] Data = new sbyte[10]; + + internal virtual void VerifyPositions(PositionData[] positions, DocsAndPositionsEnum posEnum) + { + for (int i = 0; i < positions.Length; i++) + { + int pos = posEnum.NextPosition(); + Assert.AreEqual(positions[i].Pos, pos); + if (positions[i].Payload != null) + { + Assert.IsNotNull(posEnum.GetPayload()); + if (Random().Next(3) < 2) + { + // Verify the payload bytes + BytesRef otherPayload = posEnum.GetPayload(); + Assert.IsTrue(positions[i].Payload.Equals(otherPayload), "expected=" + positions[i].Payload.ToString() + " got=" + otherPayload.ToString()); + } + } + else + { + Assert.IsNull(posEnum.GetPayload()); + } + } + } + + public virtual void _run() + { + for (int iter = 0; iter < NUM_TEST_ITER; iter++) + { + FieldData field = Fields[Random().Next(Fields.Length)]; + TermsEnum termsEnum = TermsDict.GetTerms(field.FieldInfo.Name).GetIterator(null); +#pragma warning disable 612, 618 + if (Si.Codec is Lucene3xCodec) +#pragma warning restore 612, 618 + { + // code below expects unicode sort order + continue; + } + + int upto = 0; + // Test straight enum of the terms: + while (true) + { + BytesRef term = termsEnum.Next(); + if (term == null) + { + break; + } + BytesRef expected = new BytesRef(field.Terms[upto++].Text2); + Assert.IsTrue(expected.BytesEquals(term), "expected=" + expected + " vs actual " + term); + } + Assert.AreEqual(upto, field.Terms.Length); + + // Test random seek: + TermData term2 = field.Terms[Random().Next(field.Terms.Length)]; + TermsEnum.SeekStatus status = termsEnum.SeekCeil(new BytesRef(term2.Text2)); + Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND); + Assert.AreEqual(term2.Docs.Length, termsEnum.DocFreq); + if (field.OmitTF) + { + this.VerifyDocs(term2.Docs, term2.Positions, TestUtil.Docs(Random(), termsEnum, null, null, DocsEnum.FLAG_NONE), false); + } + else + { + this.VerifyDocs(term2.Docs, term2.Positions, termsEnum.DocsAndPositions(null, null), true); + } + + // Test random seek by ord: + int idx = Random().Next(field.Terms.Length); + term2 = field.Terms[idx]; + bool success = false; + try + { + termsEnum.SeekExact(idx); + success = true; + } +#pragma warning disable 168 + catch (System.NotSupportedException uoe) +#pragma warning restore 168 + { + // ok -- skip it + } + if (success) + { + Assert.AreEqual(status, TermsEnum.SeekStatus.FOUND); + Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(term2.Text2))); + Assert.AreEqual(term2.Docs.Length, termsEnum.DocFreq); + if (field.OmitTF) + { + this.VerifyDocs(term2.Docs, term2.Positions, TestUtil.Docs(Random(), termsEnum, null, null, DocsEnum.FLAG_NONE), false); + } + else + { + this.VerifyDocs(term2.Docs, term2.Positions, termsEnum.DocsAndPositions(null, null), true); + } + } + + // Test seek to non-existent terms: + if (VERBOSE) + { + Console.WriteLine("TEST: seek non-exist terms"); + } + for (int i = 0; i < 100; i++) + { + string text2 = TestUtil.RandomUnicodeString(Random()) + "."; + status = termsEnum.SeekCeil(new BytesRef(text2)); + Assert.IsTrue(status == TermsEnum.SeekStatus.NOT_FOUND || status == TermsEnum.SeekStatus.END); + } + + // Seek to each term, backwards: + if (VERBOSE) + { + Console.WriteLine("TEST: seek terms backwards"); + } + for (int i = field.Terms.Length - 1; i >= 0; i--) + { + Assert.AreEqual(TermsEnum.SeekStatus.FOUND, termsEnum.SeekCeil(new BytesRef(field.Terms[i].Text2)), Thread.CurrentThread.Name + ": field=" + field.FieldInfo.Name + " term=" + field.Terms[i].Text2); + Assert.AreEqual(field.Terms[i].Docs.Length, termsEnum.DocFreq); + } + + // Seek to each term by ord, backwards + for (int i = field.Terms.Length - 1; i >= 0; i--) + { + try + { + termsEnum.SeekExact(i); + Assert.AreEqual(field.Terms[i].Docs.Length, termsEnum.DocFreq); + Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(field.Terms[i].Text2))); + } +#pragma warning disable 168 + catch (System.NotSupportedException uoe) +#pragma warning restore 168 + { + } + } + + // Seek to non-existent empty-string term + status = termsEnum.SeekCeil(new BytesRef("")); + Assert.IsNotNull(status); + //Assert.AreEqual(TermsEnum.SeekStatus.NOT_FOUND, status); + + // Make sure we're now pointing to first term + Assert.IsTrue(termsEnum.Term.BytesEquals(new BytesRef(field.Terms[0].Text2))); + + // Test docs enum + termsEnum.SeekCeil(new BytesRef("")); + upto = 0; + do + { + term2 = field.Terms[upto]; + if (Random().Next(3) == 1) + { + DocsEnum docs; + DocsEnum docsAndFreqs; + DocsAndPositionsEnum postings; + if (!field.OmitTF) + { + postings = termsEnum.DocsAndPositions(null, null); + if (postings != null) + { + docs = docsAndFreqs = postings; + } + else + { + docs = docsAndFreqs = TestUtil.Docs(Random(), termsEnum, null, null, DocsEnum.FLAG_FREQS); + } + } + else + { + postings = null; + docsAndFreqs = null; + docs = TestUtil.Docs(Random(), termsEnum, null, null, DocsEnum.FLAG_NONE); + } + Assert.IsNotNull(docs); + int upto2 = -1; + bool ended = false; + while (upto2 < term2.Docs.Length - 1) + { + // Maybe skip: + int left = term2.Docs.Length - upto2; + int doc; + if (Random().Next(3) == 1 && left >= 1) + { + int inc = 1 + Random().Next(left - 1); + upto2 += inc; + if (Random().Next(2) == 1) + { + doc = docs.Advance(term2.Docs[upto2]); + Assert.AreEqual(term2.Docs[upto2], doc); + } + else + { + doc = docs.Advance(1 + term2.Docs[upto2]); + if (doc == DocIdSetIterator.NO_MORE_DOCS) + { + // skipped past last doc + Debug.Assert(upto2 == term2.Docs.Length - 1); + ended = true; + break; + } + else + { + // skipped to next doc + Debug.Assert(upto2 < term2.Docs.Length - 1); + if (doc >= term2.Docs[1 + upto2]) + { + upto2++; + } + } + } + } + else + { + doc = docs.NextDoc(); + Assert.IsTrue(doc != -1); + upto2++; + } + Assert.AreEqual(term2.Docs[upto2], doc); + if (!field.OmitTF) + { + Assert.AreEqual(term2.Positions[upto2].Length, postings.Freq); + if (Random().Next(2) == 1) + { + this.VerifyPositions(term2.Positions[upto2], postings); + } + } + } + + if (!ended) + { + Assert.AreEqual(DocIdSetIterator.NO_MORE_DOCS, docs.NextDoc()); + } + } + upto++; + } while (termsEnum.Next() != null); + + Assert.AreEqual(upto, field.Terms.Length); + } + } + } + + private void Write(FieldInfos fieldInfos, Directory dir, FieldData[] fields, bool allowPreFlex) + { + int termIndexInterval = TestUtil.NextInt(Random(), 13, 27); + Codec codec = Codec.Default; + SegmentInfo si = new SegmentInfo(dir, Constants.LUCENE_MAIN_VERSION, SEGMENT, 10000, false, codec, null); + SegmentWriteState state = new SegmentWriteState(InfoStream.Default, dir, si, fieldInfos, termIndexInterval, null, NewIOContext(Random())); + + FieldsConsumer consumer = codec.PostingsFormat.FieldsConsumer(state); + Array.Sort(fields); + foreach (FieldData field in fields) + { +#pragma warning disable 612, 618 + if (!allowPreFlex && codec is Lucene3xCodec) +#pragma warning restore 612, 618 + { + // code below expects unicode sort order + continue; + } + field.Write(consumer); + } + consumer.Dispose(); + } + + [Test] + public virtual void TestDocsOnlyFreq() + { + // tests that when fields are indexed with DOCS_ONLY, the Codec + // returns 1 in docsEnum.Freq() + Directory dir = NewDirectory(); + Random random = Random(); + IndexWriter writer = new IndexWriter(dir, NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); + // we don't need many documents to assert this, but don't use one document either + int numDocs = AtLeast(random, 50); + for (int i = 0; i < numDocs; i++) + { + Document doc = new Document(); + doc.Add(new StringField("f", "doc", Store.NO)); + writer.AddDocument(doc); + } + writer.Dispose(); + + Term term = new Term("f", new BytesRef("doc")); + DirectoryReader reader = DirectoryReader.Open(dir); + foreach (AtomicReaderContext ctx in reader.Leaves) + { + DocsEnum de = ((AtomicReader)ctx.Reader).TermDocsEnum(term); + while (de.NextDoc() != DocIdSetIterator.NO_MORE_DOCS) + { + Assert.AreEqual(1, de.Freq, "wrong freq for doc " + de.DocID); + } + } + reader.Dispose(); + + dir.Dispose(); + } + + [Test] + public virtual void TestDisableImpersonation() + { + Codec[] oldCodecs = new Codec[] { new Lucene40RWCodec(OLD_FORMAT_IMPERSONATION_IS_ACTIVE), new Lucene41RWCodec(OLD_FORMAT_IMPERSONATION_IS_ACTIVE), new Lucene42RWCodec(OLD_FORMAT_IMPERSONATION_IS_ACTIVE) }; + Directory dir = NewDirectory(); + IndexWriterConfig conf = NewIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(Random())); + conf.SetCodec(oldCodecs[Random().Next(oldCodecs.Length)]); + IndexWriter writer = new IndexWriter(dir, conf); + + Document doc = new Document(); + doc.Add(new StringField("f", "bar", Store.YES)); + doc.Add(new NumericDocValuesField("n", 18L)); + writer.AddDocument(doc); + + OLD_FORMAT_IMPERSONATION_IS_ACTIVE = false; + try + { + writer.Dispose(); + Assert.Fail("should not have succeeded to impersonate an old format!"); + } +#pragma warning disable 168 + catch (System.NotSupportedException e) +#pragma warning restore 168 + { + writer.Rollback(); + } + finally + { + OLD_FORMAT_IMPERSONATION_IS_ACTIVE = true; + } + + dir.Dispose(); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/lucenenet/blob/96822396/src/Lucene.Net.Tests/Index/TestCompoundFile.cs ---------------------------------------------------------------------- diff --git a/src/Lucene.Net.Tests/Index/TestCompoundFile.cs b/src/Lucene.Net.Tests/Index/TestCompoundFile.cs new file mode 100644 index 0000000..8452701 --- /dev/null +++ b/src/Lucene.Net.Tests/Index/TestCompoundFile.cs @@ -0,0 +1,917 @@ +using Lucene.Net.Documents; +using Lucene.Net.Store; +using System; + +namespace Lucene.Net.Index +{ + using NUnit.Framework; + using System.IO; + using CompoundFileDirectory = Lucene.Net.Store.CompoundFileDirectory; + using Directory = Lucene.Net.Store.Directory; + + /* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + using Document = Documents.Document; + using Field = Field; + using IndexInput = Lucene.Net.Store.IndexInput; + using IndexOutput = Lucene.Net.Store.IndexOutput; + using IOContext = Lucene.Net.Store.IOContext; + using IOUtils = Lucene.Net.Util.IOUtils; + using LuceneTestCase = Lucene.Net.Util.LuceneTestCase; + using SimpleFSDirectory = Lucene.Net.Store.SimpleFSDirectory; + using TestUtil = Lucene.Net.Util.TestUtil; + + [TestFixture] + public class TestCompoundFile : LuceneTestCase + { + private Directory Dir; + + [SetUp] + public override void SetUp() + { + base.SetUp(); + DirectoryInfo file = CreateTempDir("testIndex"); + // use a simple FSDir here, to be sure to have SimpleFSInputs + Dir = new SimpleFSDirectory(file, null); + } + + [TearDown] + public override void TearDown() + { + Dir.Dispose(); + base.TearDown(); + } + + /// <summary> + /// Creates a file of the specified size with random data. </summary> + private void CreateRandomFile(Directory dir, string name, int size) + { + IndexOutput os = dir.CreateOutput(name, NewIOContext(Random())); + for (int i = 0; i < size; i++) + { + var b = unchecked((sbyte)(new Random(1).NextDouble() * 256)); + os.WriteByte((byte)b); + } + os.Dispose(); + } + + /// <summary> + /// Creates a file of the specified size with sequential data. The first + /// byte is written as the start byte provided. All subsequent bytes are + /// computed as start + offset where offset is the number of the byte. + /// </summary> + private void CreateSequenceFile(Directory dir, string name, sbyte start, int size) + { + IndexOutput os = dir.CreateOutput(name, NewIOContext(Random())); + for (int i = 0; i < size; i++) + { + os.WriteByte((byte)start); + start++; + } + os.Dispose(); + } + + private void AssertSameStreams(string msg, IndexInput expected, IndexInput test) + { + Assert.IsNotNull(expected, msg + " null expected"); + Assert.IsNotNull(test, msg + " null test"); + Assert.AreEqual(expected.Length, test.Length, msg + " length"); + Assert.AreEqual(expected.FilePointer, test.FilePointer, msg + " position"); + + var expectedBuffer = new byte[512]; + var testBuffer = new byte[expectedBuffer.Length]; + + long remainder = expected.Length - expected.FilePointer; + while (remainder > 0) + { + int readLen = (int)Math.Min(remainder, expectedBuffer.Length); + expected.ReadBytes(expectedBuffer, 0, readLen); + test.ReadBytes(testBuffer, 0, readLen); + AssertEqualArrays(msg + ", remainder " + remainder, expectedBuffer, testBuffer, 0, readLen); + remainder -= readLen; + } + } + + private void AssertSameStreams(string msg, IndexInput expected, IndexInput actual, long seekTo) + { + if (seekTo >= 0 && seekTo < expected.Length) + { + expected.Seek(seekTo); + actual.Seek(seekTo); + AssertSameStreams(msg + ", seek(mid)", expected, actual); + } + } + + private void AssertSameSeekBehavior(string msg, IndexInput expected, IndexInput actual) + { + // seek to 0 + long point = 0; + AssertSameStreams(msg + ", seek(0)", expected, actual, point); + + // seek to middle + point = expected.Length / 2L; + AssertSameStreams(msg + ", seek(mid)", expected, actual, point); + + // seek to end - 2 + point = expected.Length - 2; + AssertSameStreams(msg + ", seek(end-2)", expected, actual, point); + + // seek to end - 1 + point = expected.Length - 1; + AssertSameStreams(msg + ", seek(end-1)", expected, actual, point); + + // seek to the end + point = expected.Length; + AssertSameStreams(msg + ", seek(end)", expected, actual, point); + + // seek past end + point = expected.Length + 1; + AssertSameStreams(msg + ", seek(end+1)", expected, actual, point); + } + + private void AssertEqualArrays(string msg, byte[] expected, byte[] test, int start, int len) + { + Assert.IsNotNull(expected, msg + " null expected"); + Assert.IsNotNull(test, msg + " null test"); + + for (int i = start; i < len; i++) + { + Assert.AreEqual(expected[i], test[i], msg + " " + i); + } + } + + // =========================================================== + // Tests of the basic CompoundFile functionality + // =========================================================== + + /// <summary> + /// this test creates compound file based on a single file. + /// Files of different sizes are tested: 0, 1, 10, 100 bytes. + /// </summary> + [Test] + public virtual void TestSingleFile() + { + int[] data = new int[] { 0, 1, 10, 100 }; + for (int i = 0; i < data.Length; i++) + { + string name = "t" + data[i]; + CreateSequenceFile(Dir, name, (sbyte)0, data[i]); + CompoundFileDirectory csw = new CompoundFileDirectory(Dir, name + ".cfs", NewIOContext(Random()), true); + Dir.Copy(csw, name, name, NewIOContext(Random())); + csw.Dispose(); + + CompoundFileDirectory csr = new CompoundFileDirectory(Dir, name + ".cfs", NewIOContext(Random()), false); + IndexInput expected = Dir.OpenInput(name, NewIOContext(Random())); + IndexInput actual = csr.OpenInput(name, NewIOContext(Random())); + AssertSameStreams(name, expected, actual); + AssertSameSeekBehavior(name, expected, actual); + expected.Dispose(); + actual.Dispose(); + csr.Dispose(); + } + } + + /// <summary> + /// this test creates compound file based on two files. + /// + /// </summary> + [Test] + public virtual void TestTwoFiles() + { + CreateSequenceFile(Dir, "d1", (sbyte)0, 15); + CreateSequenceFile(Dir, "d2", (sbyte)0, 114); + + CompoundFileDirectory csw = new CompoundFileDirectory(Dir, "d.cfs", NewIOContext(Random()), true); + Dir.Copy(csw, "d1", "d1", NewIOContext(Random())); + Dir.Copy(csw, "d2", "d2", NewIOContext(Random())); + csw.Dispose(); + + CompoundFileDirectory csr = new CompoundFileDirectory(Dir, "d.cfs", NewIOContext(Random()), false); + IndexInput expected = Dir.OpenInput("d1", NewIOContext(Random())); + IndexInput actual = csr.OpenInput("d1", NewIOContext(Random())); + AssertSameStreams("d1", expected, actual); + AssertSameSeekBehavior("d1", expected, actual); + expected.Dispose(); + actual.Dispose(); + + expected = Dir.OpenInput("d2", NewIOContext(Random())); + actual = csr.OpenInput("d2", NewIOContext(Random())); + AssertSameStreams("d2", expected, actual); + AssertSameSeekBehavior("d2", expected, actual); + expected.Dispose(); + actual.Dispose(); + csr.Dispose(); + } + + /// <summary> + /// this test creates a compound file based on a large number of files of + /// various length. The file content is generated randomly. The sizes range + /// from 0 to 1Mb. Some of the sizes are selected to test the buffering + /// logic in the file reading code. For this the chunk variable is set to + /// the length of the buffer used internally by the compound file logic. + /// </summary> + [Test] + public virtual void TestRandomFiles() + { + // Setup the test segment + string segment = "test"; + int chunk = 1024; // internal buffer size used by the stream + CreateRandomFile(Dir, segment + ".zero", 0); + CreateRandomFile(Dir, segment + ".one", 1); + CreateRandomFile(Dir, segment + ".ten", 10); + CreateRandomFile(Dir, segment + ".hundred", 100); + CreateRandomFile(Dir, segment + ".big1", chunk); + CreateRandomFile(Dir, segment + ".big2", chunk - 1); + CreateRandomFile(Dir, segment + ".big3", chunk + 1); + CreateRandomFile(Dir, segment + ".big4", 3 * chunk); + CreateRandomFile(Dir, segment + ".big5", 3 * chunk - 1); + CreateRandomFile(Dir, segment + ".big6", 3 * chunk + 1); + CreateRandomFile(Dir, segment + ".big7", 1000 * chunk); + + // Setup extraneous files + CreateRandomFile(Dir, "onetwothree", 100); + CreateRandomFile(Dir, segment + ".notIn", 50); + CreateRandomFile(Dir, segment + ".notIn2", 51); + + // Now test + CompoundFileDirectory csw = new CompoundFileDirectory(Dir, "test.cfs", NewIOContext(Random()), true); + string[] data = new string[] { ".zero", ".one", ".ten", ".hundred", ".big1", ".big2", ".big3", ".big4", ".big5", ".big6", ".big7" }; + for (int i = 0; i < data.Length; i++) + { + string fileName = segment + data[i]; + Dir.Copy(csw, fileName, fileName, NewIOContext(Random())); + } + csw.Dispose(); + + CompoundFileDirectory csr = new CompoundFileDirectory(Dir, "test.cfs", NewIOContext(Random()), false); + for (int i = 0; i < data.Length; i++) + { + IndexInput check = Dir.OpenInput(segment + data[i], NewIOContext(Random())); + IndexInput test = csr.OpenInput(segment + data[i], NewIOContext(Random())); + AssertSameStreams(data[i], check, test); + AssertSameSeekBehavior(data[i], check, test); + test.Dispose(); + check.Dispose(); + } + csr.Dispose(); + } + + /// <summary> + /// Setup a larger compound file with a number of components, each of + /// which is a sequential file (so that we can easily tell that we are + /// reading in the right byte). The methods sets up 20 files - f0 to f19, + /// the size of each file is 1000 bytes. + /// </summary> + private void SetUp_2() + { + CompoundFileDirectory cw = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), true); + for (int i = 0; i < 20; i++) + { + CreateSequenceFile(Dir, "f" + i, (sbyte)0, 2000); + string fileName = "f" + i; + Dir.Copy(cw, fileName, fileName, NewIOContext(Random())); + } + cw.Dispose(); + } + + [Test] + public virtual void TestReadAfterClose() + { + try + { + Demo_FSIndexInputBug(Dir, "test"); + } +#pragma warning disable 168 + catch (ObjectDisposedException ode) +#pragma warning restore 168 + { + // expected + } + } + + private void Demo_FSIndexInputBug(Directory fsdir, string file) + { + // Setup the test file - we need more than 1024 bytes + IndexOutput os = fsdir.CreateOutput(file, IOContext.DEFAULT); + for (int i = 0; i < 2000; i++) + { + os.WriteByte((byte)(sbyte)i); + } + os.Dispose(); + + IndexInput @in = fsdir.OpenInput(file, IOContext.DEFAULT); + + // this read primes the buffer in IndexInput + @in.ReadByte(); + + // Close the file + @in.Dispose(); + + // ERROR: this call should fail, but succeeds because the buffer + // is still filled + @in.ReadByte(); + + // ERROR: this call should fail, but succeeds for some reason as well + @in.Seek(1099); + + try + { + // OK: this call correctly fails. We are now past the 1024 internal + // buffer, so an actual IO is attempted, which fails + @in.ReadByte(); + Assert.Fail("expected readByte() to throw exception"); + } +#pragma warning disable 168 + catch (IOException e) +#pragma warning restore 168 + { + // expected exception + } + } + + [Test] + public virtual void TestClonedStreamsClosing() + { + SetUp_2(); + CompoundFileDirectory cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), false); + + // basic clone + IndexInput expected = Dir.OpenInput("f11", NewIOContext(Random())); + + // this test only works for FSIndexInput + Assert.IsTrue(TestHelper.IsSimpleFSIndexInput(expected)); + Assert.IsTrue(TestHelper.IsSimpleFSIndexInputOpen(expected)); + + IndexInput one = cr.OpenInput("f11", NewIOContext(Random())); + + IndexInput two = (IndexInput)one.Clone(); + + AssertSameStreams("basic clone one", expected, one); + expected.Seek(0); + AssertSameStreams("basic clone two", expected, two); + + // Now close the first stream + one.Dispose(); + + // The following should really fail since we couldn't expect to + // access a file once close has been called on it (regardless of + // buffering and/or clone magic) + expected.Seek(0); + two.Seek(0); + AssertSameStreams("basic clone two/2", expected, two); + + // Now close the compound reader + cr.Dispose(); + + // The following may also fail since the compound stream is closed + expected.Seek(0); + two.Seek(0); + //assertSameStreams("basic clone two/3", expected, two); + + // Now close the second clone + two.Dispose(); + expected.Seek(0); + two.Seek(0); + //assertSameStreams("basic clone two/4", expected, two); + + expected.Dispose(); + } + + /// <summary> + /// this test opens two files from a compound stream and verifies that + /// their file positions are independent of each other. + /// </summary> + [Test] + public virtual void TestRandomAccess() + { + SetUp_2(); + CompoundFileDirectory cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), false); + + // Open two files + IndexInput e1 = Dir.OpenInput("f11", NewIOContext(Random())); + IndexInput e2 = Dir.OpenInput("f3", NewIOContext(Random())); + + IndexInput a1 = cr.OpenInput("f11", NewIOContext(Random())); + IndexInput a2 = Dir.OpenInput("f3", NewIOContext(Random())); + + // Seek the first pair + e1.Seek(100); + a1.Seek(100); + Assert.AreEqual(100, e1.FilePointer); + Assert.AreEqual(100, a1.FilePointer); + byte be1 = e1.ReadByte(); + byte ba1 = a1.ReadByte(); + Assert.AreEqual(be1, ba1); + + // Now seek the second pair + e2.Seek(1027); + a2.Seek(1027); + Assert.AreEqual(1027, e2.FilePointer); + Assert.AreEqual(1027, a2.FilePointer); + byte be2 = e2.ReadByte(); + byte ba2 = a2.ReadByte(); + Assert.AreEqual(be2, ba2); + + // Now make sure the first one didn't move + Assert.AreEqual(101, e1.FilePointer); + Assert.AreEqual(101, a1.FilePointer); + be1 = e1.ReadByte(); + ba1 = a1.ReadByte(); + Assert.AreEqual(be1, ba1); + + // Now more the first one again, past the buffer length + e1.Seek(1910); + a1.Seek(1910); + Assert.AreEqual(1910, e1.FilePointer); + Assert.AreEqual(1910, a1.FilePointer); + be1 = e1.ReadByte(); + ba1 = a1.ReadByte(); + Assert.AreEqual(be1, ba1); + + // Now make sure the second set didn't move + Assert.AreEqual(1028, e2.FilePointer); + Assert.AreEqual(1028, a2.FilePointer); + be2 = e2.ReadByte(); + ba2 = a2.ReadByte(); + Assert.AreEqual(be2, ba2); + + // Move the second set back, again cross the buffer size + e2.Seek(17); + a2.Seek(17); + Assert.AreEqual(17, e2.FilePointer); + Assert.AreEqual(17, a2.FilePointer); + be2 = e2.ReadByte(); + ba2 = a2.ReadByte(); + Assert.AreEqual(be2, ba2); + + // Finally, make sure the first set didn't move + // Now make sure the first one didn't move + Assert.AreEqual(1911, e1.FilePointer); + Assert.AreEqual(1911, a1.FilePointer); + be1 = e1.ReadByte(); + ba1 = a1.ReadByte(); + Assert.AreEqual(be1, ba1); + + e1.Dispose(); + e2.Dispose(); + a1.Dispose(); + a2.Dispose(); + cr.Dispose(); + } + + /// <summary> + /// this test opens two files from a compound stream and verifies that + /// their file positions are independent of each other. + /// </summary> + [Test] + public virtual void TestRandomAccessClones() + { + SetUp_2(); + CompoundFileDirectory cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), false); + + // Open two files + IndexInput e1 = cr.OpenInput("f11", NewIOContext(Random())); + IndexInput e2 = cr.OpenInput("f3", NewIOContext(Random())); + + IndexInput a1 = (IndexInput)e1.Clone(); + IndexInput a2 = (IndexInput)e2.Clone(); + + // Seek the first pair + e1.Seek(100); + a1.Seek(100); + Assert.AreEqual(100, e1.FilePointer); + Assert.AreEqual(100, a1.FilePointer); + byte be1 = e1.ReadByte(); + byte ba1 = a1.ReadByte(); + Assert.AreEqual(be1, ba1); + + // Now seek the second pair + e2.Seek(1027); + a2.Seek(1027); + Assert.AreEqual(1027, e2.FilePointer); + Assert.AreEqual(1027, a2.FilePointer); + byte be2 = e2.ReadByte(); + byte ba2 = a2.ReadByte(); + Assert.AreEqual(be2, ba2); + + // Now make sure the first one didn't move + Assert.AreEqual(101, e1.FilePointer); + Assert.AreEqual(101, a1.FilePointer); + be1 = e1.ReadByte(); + ba1 = a1.ReadByte(); + Assert.AreEqual(be1, ba1); + + // Now more the first one again, past the buffer length + e1.Seek(1910); + a1.Seek(1910); + Assert.AreEqual(1910, e1.FilePointer); + Assert.AreEqual(1910, a1.FilePointer); + be1 = e1.ReadByte(); + ba1 = a1.ReadByte(); + Assert.AreEqual(be1, ba1); + + // Now make sure the second set didn't move + Assert.AreEqual(1028, e2.FilePointer); + Assert.AreEqual(1028, a2.FilePointer); + be2 = e2.ReadByte(); + ba2 = a2.ReadByte(); + Assert.AreEqual(be2, ba2); + + // Move the second set back, again cross the buffer size + e2.Seek(17); + a2.Seek(17); + Assert.AreEqual(17, e2.FilePointer); + Assert.AreEqual(17, a2.FilePointer); + be2 = e2.ReadByte(); + ba2 = a2.ReadByte(); + Assert.AreEqual(be2, ba2); + + // Finally, make sure the first set didn't move + // Now make sure the first one didn't move + Assert.AreEqual(1911, e1.FilePointer); + Assert.AreEqual(1911, a1.FilePointer); + be1 = e1.ReadByte(); + ba1 = a1.ReadByte(); + Assert.AreEqual(be1, ba1); + + e1.Dispose(); + e2.Dispose(); + a1.Dispose(); + a2.Dispose(); + cr.Dispose(); + } + + [Test] + public virtual void TestFileNotFound() + { + SetUp_2(); + CompoundFileDirectory cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), false); + + // Open two files + try + { + cr.OpenInput("bogus", NewIOContext(Random())); + Assert.Fail("File not found"); + } +#pragma warning disable 168 + catch (Exception e) +#pragma warning restore 168 + { + /* success */ + //System.out.println("SUCCESS: File Not Found: " + e); + } + + cr.Dispose(); + } + + [Test] + public virtual void TestReadPastEOF() + { + SetUp_2(); + var cr = new CompoundFileDirectory(Dir, "f.comp", NewIOContext(Random()), false); + IndexInput @is = cr.OpenInput("f2", NewIOContext(Random())); + @is.Seek(@is.Length - 10); + var b = new byte[100]; + @is.ReadBytes(b, 0, 10); + + try + { + @is.ReadByte(); + Assert.Fail("Single byte read past end of file"); + } +#pragma warning disable 168 + catch (IOException e) +#pragma warning restore 168 + { + /* success */ + //System.out.println("SUCCESS: single byte read past end of file: " + e); + } + + @is.Seek(@is.Length - 10); + try + { + @is.ReadBytes(b, 0, 50); + Assert.Fail("Block read past end of file"); + } +#pragma warning disable 168 + catch (IOException e) +#pragma warning restore 168 + { + /* success */ + //System.out.println("SUCCESS: block read past end of file: " + e); + } + + @is.Dispose(); + cr.Dispose(); + } + + /// <summary> + /// this test that writes larger than the size of the buffer output + /// will correctly increment the file pointer. + /// </summary> + [Test] + public virtual void TestLargeWrites() + { + IndexOutput os = Dir.CreateOutput("testBufferStart.txt", NewIOContext(Random())); + + var largeBuf = new byte[2048]; + for (int i = 0; i < largeBuf.Length; i++) + { + largeBuf[i] = (byte)unchecked((sbyte)(new Random(1).NextDouble() * 256)); + } + + long currentPos = os.FilePointer; + os.WriteBytes(largeBuf, largeBuf.Length); + + try + { + Assert.AreEqual(currentPos + largeBuf.Length, os.FilePointer); + } + finally + { + os.Dispose(); + } + } + + [Test] + public virtual void TestAddExternalFile() + { + CreateSequenceFile(Dir, "d1", (sbyte)0, 15); + + Directory newDir = NewDirectory(); + CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true); + Dir.Copy(csw, "d1", "d1", NewIOContext(Random())); + csw.Dispose(); + + CompoundFileDirectory csr = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false); + IndexInput expected = Dir.OpenInput("d1", NewIOContext(Random())); + IndexInput actual = csr.OpenInput("d1", NewIOContext(Random())); + AssertSameStreams("d1", expected, actual); + AssertSameSeekBehavior("d1", expected, actual); + expected.Dispose(); + actual.Dispose(); + csr.Dispose(); + + newDir.Dispose(); + } + + [Test] + public virtual void TestAppend() + { + Directory newDir = NewDirectory(); + CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true); + int size = 5 + Random().Next(128); + for (int j = 0; j < 2; j++) + { + IndexOutput os = csw.CreateOutput("seg_" + j + "_foo.txt", NewIOContext(Random())); + for (int i = 0; i < size; i++) + { + os.WriteInt32(i * j); + } + os.Dispose(); + string[] listAll = newDir.ListAll(); + Assert.AreEqual(1, listAll.Length); + Assert.AreEqual("d.cfs", listAll[0]); + } + CreateSequenceFile(Dir, "d1", (sbyte)0, 15); + Dir.Copy(csw, "d1", "d1", NewIOContext(Random())); + string[] listAll_ = newDir.ListAll(); + Assert.AreEqual(1, listAll_.Length); + Assert.AreEqual("d.cfs", listAll_[0]); + csw.Dispose(); + CompoundFileDirectory csr = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false); + for (int j = 0; j < 2; j++) + { + IndexInput openInput = csr.OpenInput("seg_" + j + "_foo.txt", NewIOContext(Random())); + Assert.AreEqual(size * 4, openInput.Length); + for (int i = 0; i < size; i++) + { + Assert.AreEqual(i * j, openInput.ReadInt32()); + } + + openInput.Dispose(); + } + IndexInput expected = Dir.OpenInput("d1", NewIOContext(Random())); + IndexInput actual = csr.OpenInput("d1", NewIOContext(Random())); + AssertSameStreams("d1", expected, actual); + AssertSameSeekBehavior("d1", expected, actual); + expected.Dispose(); + actual.Dispose(); + csr.Dispose(); + newDir.Dispose(); + } + + [Test] + public virtual void TestAppendTwice() + { + Directory newDir = NewDirectory(); + CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true); + CreateSequenceFile(newDir, "d1", (sbyte)0, 15); + IndexOutput @out = csw.CreateOutput("d.xyz", NewIOContext(Random())); + @out.WriteInt32(0); + @out.Dispose(); + Assert.AreEqual(1, csw.ListAll().Length); + Assert.AreEqual("d.xyz", csw.ListAll()[0]); + + csw.Dispose(); + + CompoundFileDirectory cfr = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false); + Assert.AreEqual(1, cfr.ListAll().Length); + Assert.AreEqual("d.xyz", cfr.ListAll()[0]); + cfr.Dispose(); + newDir.Dispose(); + } + + [Test] + public virtual void TestEmptyCFS() + { + Directory newDir = NewDirectory(); + CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true); + csw.Dispose(); + + CompoundFileDirectory csr = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false); + Assert.AreEqual(0, csr.ListAll().Length); + csr.Dispose(); + + newDir.Dispose(); + } + + [Test] + public virtual void TestReadNestedCFP() + { + Directory newDir = NewDirectory(); + CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true); + CompoundFileDirectory nested = new CompoundFileDirectory(newDir, "b.cfs", NewIOContext(Random()), true); + IndexOutput @out = nested.CreateOutput("b.xyz", NewIOContext(Random())); + IndexOutput out1 = nested.CreateOutput("b_1.xyz", NewIOContext(Random())); + @out.WriteInt32(0); + out1.WriteInt32(1); + @out.Dispose(); + out1.Dispose(); + nested.Dispose(); + newDir.Copy(csw, "b.cfs", "b.cfs", NewIOContext(Random())); + newDir.Copy(csw, "b.cfe", "b.cfe", NewIOContext(Random())); + newDir.DeleteFile("b.cfs"); + newDir.DeleteFile("b.cfe"); + csw.Dispose(); + + Assert.AreEqual(2, newDir.ListAll().Length); + csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false); + + Assert.AreEqual(2, csw.ListAll().Length); + nested = new CompoundFileDirectory(csw, "b.cfs", NewIOContext(Random()), false); + + Assert.AreEqual(2, nested.ListAll().Length); + IndexInput openInput = nested.OpenInput("b.xyz", NewIOContext(Random())); + Assert.AreEqual(0, openInput.ReadInt32()); + openInput.Dispose(); + openInput = nested.OpenInput("b_1.xyz", NewIOContext(Random())); + Assert.AreEqual(1, openInput.ReadInt32()); + openInput.Dispose(); + nested.Dispose(); + csw.Dispose(); + newDir.Dispose(); + } + + [Test] + public virtual void TestDoubleClose() + { + Directory newDir = NewDirectory(); + CompoundFileDirectory csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), true); + IndexOutput @out = csw.CreateOutput("d.xyz", NewIOContext(Random())); + @out.WriteInt32(0); + @out.Dispose(); + + csw.Dispose(); + // close a second time - must have no effect according to IDisposable + csw.Dispose(); + + csw = new CompoundFileDirectory(newDir, "d.cfs", NewIOContext(Random()), false); + IndexInput openInput = csw.OpenInput("d.xyz", NewIOContext(Random())); + Assert.AreEqual(0, openInput.ReadInt32()); + openInput.Dispose(); + csw.Dispose(); + // close a second time - must have no effect according to IDisposable + csw.Dispose(); + + newDir.Dispose(); + } + + // Make sure we don't somehow use more than 1 descriptor + // when reading a CFS with many subs: + [Test] + public virtual void TestManySubFiles() + { + Directory d = NewFSDirectory(CreateTempDir("CFSManySubFiles")); + int FILE_COUNT = AtLeast(500); + + for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) + { + IndexOutput @out = d.CreateOutput("file." + fileIdx, NewIOContext(Random())); + @out.WriteByte((byte)(sbyte)fileIdx); + @out.Dispose(); + } + + CompoundFileDirectory cfd = new CompoundFileDirectory(d, "c.cfs", NewIOContext(Random()), true); + for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) + { + string fileName = "file." + fileIdx; + d.Copy(cfd, fileName, fileName, NewIOContext(Random())); + } + cfd.Dispose(); + + IndexInput[] ins = new IndexInput[FILE_COUNT]; + CompoundFileDirectory cfr = new CompoundFileDirectory(d, "c.cfs", NewIOContext(Random()), false); + for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) + { + ins[fileIdx] = cfr.OpenInput("file." + fileIdx, NewIOContext(Random())); + } + + for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) + { + Assert.AreEqual((byte)fileIdx, ins[fileIdx].ReadByte()); + } + + for (int fileIdx = 0; fileIdx < FILE_COUNT; fileIdx++) + { + ins[fileIdx].Dispose(); + } + cfr.Dispose(); + d.Dispose(); + } + + [Test] + public virtual void TestListAll() + { + Directory dir = NewDirectory(); + // riw should sometimes create docvalues fields, etc + RandomIndexWriter riw = new RandomIndexWriter(Random(), dir, Similarity, TimeZone); + Document doc = new Document(); + // these fields should sometimes get term vectors, etc + Field idField = NewStringField("id", "", Field.Store.NO); + Field bodyField = NewTextField("body", "", Field.Store.NO); + doc.Add(idField); + doc.Add(bodyField); + for (int i = 0; i < 100; i++) + { + idField.SetStringValue(Convert.ToString(i)); + bodyField.SetStringValue(TestUtil.RandomUnicodeString(Random())); + riw.AddDocument(doc); + if (Random().Next(7) == 0) + { + riw.Commit(); + } + } + riw.Dispose(); + CheckFiles(dir); + dir.Dispose(); + } + + // checks that we can open all files returned by listAll! + private void CheckFiles(Directory dir) + { + foreach (string file in dir.ListAll()) + { + if (file.EndsWith(IndexFileNames.COMPOUND_FILE_EXTENSION)) + { + CompoundFileDirectory cfsDir = new CompoundFileDirectory(dir, file, NewIOContext(Random()), false); + CheckFiles(cfsDir); // recurse into cfs + cfsDir.Dispose(); + } + IndexInput @in = null; + bool success = false; + try + { + @in = dir.OpenInput(file, NewIOContext(Random())); + success = true; + } + finally + { + if (success) + { + IOUtils.Close(@in); + } + else + { + IOUtils.CloseWhileHandlingException(@in); + } + } + } + } + } +} \ No newline at end of file
