There is still a bug in IndexWriter(or DocumentsWriter or ....).
Here is a simplified case of TestIndexWriter/TestStressIndexing2.
DIGY
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using Lucene.Net.Search;
using Lucene.Net.Index;
using Lucene.Net.Documents;
using Lucene.Net.Store;
using LuceneTestCase = Lucene.Net.Util.LuceneTestCase;
using Lucene.Net.Analysis;
using NUnit.Framework;
namespace Lucene.Net
{
[TestFixture]
public class _DIGY
{
Random rand = new Random(12345678);
[Test]
public void AnotherIndexWriterTest()
{
Dictionary<string, Document> docs = new Dictionary<string,
Document>();
//Create random docs, add to index, and also store them in
dictionary "docs".
RAMDirectory dir = new RAMDirectory();
IndexWriter w = new IndexWriter(dir, false, new
WhitespaceAnalyzer(), true);
w.SetMergeFactor(3);
w.SetRAMBufferSizeMB(.1);
w.SetMaxBufferedDocs(3);
w.SetMaxFieldLength(int.MaxValue);
w.SetUseCompoundFile(false);
for (int i = 0; i < 1000; i++)
{
IndexDocs(w,docs);
}
w.Close();
//Compare docs in index & in Dictionary
IndexReader r = new Lucene.Net.Search.IndexSearcher(dir).Reader;
Assert.AreEqual(r.NumDocs(), docs.Count, "Doc# mismatch");
for (int i = 0; i < r.MaxDoc(); i++)
{
if (!r.IsDeleted(i))
{
Document docInIndex = r.Document(i);
Document docInDict =
docs[docInIndex.GetField("id").StringValue()];
if (docInDict.GetFields().Count !=
docInIndex.GetFields().Count)
{
WriteDoc("Doc In Index ", docInIndex);
WriteDoc("Doc In HashTable ", docInDict);
}
Assert.AreEqual(docInDict.GetFields().Count,
docInIndex.GetFields().Count, "Field# mismatch");
}
}
}
void IndexDocs(Lucene.Net.Index.IndexWriter w, Dictionary<string,
Document> docs)
{
Document d = new Document();
System.String id = rand.Next(10).ToString();
Field idField = new Field("id", id, Field.Store.YES,
Field.Index.NO_NORMS);
d.Add(idField);
int nFields = rand.Next(10) + 1;
for (int i = 0; i < nFields; i++)
{
d.Add(new Field("f" + i, rand.Next(100).ToString(),
Field.Store.NO, Field.Index.TOKENIZED, TermVectorType[rand.Next(4)]));
}
//add doc to index
w.UpdateDocument(new Term("id",id),d);
//and also store it in a Dictionary
docs[id] = d;
}
Field.TermVector[] TermVectorType = new Field.TermVector[]
{
Field.TermVector.NO,
Field.TermVector.YES,
Field.TermVector.WITH_POSITIONS,
Field.TermVector.WITH_POSITIONS_OFFSETS
};
//DEBUG
void WriteDoc(string Prompt,Document d)
{
Console.WriteLine(Prompt + " DOCID: " +
d.GetField("id").StringValue());
System.Collections.IList fields = d.GetFields();
foreach (Field f in fields)
{
string s = "";
s += "\n\t\tIsIndexed:" + f.IsIndexed().ToString();
s += "\n\t\tIsStored:" + f.IsStored().ToString();
s += "\n\t\tIsTokenized:" + f.IsTokenized().ToString();
s += "\n\t\tIsTermVectorStored:" +
f.IsTermVectorStored().ToString();
s += "\n\t\tIsStoreOffsetWithTermVector:" +
f.IsStoreOffsetWithTermVector().ToString();
s += "\n\t\tIsStorePositionWithTermVector:" +
f.IsStorePositionWithTermVector().ToString();
Console.WriteLine("\t" + f.Name() + ":" + f.StringValue() + s);
}
}
}
}