IndexWriter implements the method docCount() which reads the number
of documents from the SegmentInfos of the index. However, it delivers
incorrect values if documents get deleted from the index. The reason for
this is that SegmentInfo.docCounts are updated in an incorrect way when
segments get merged. The new value is taken from the old SegmentInfos.
It would be better to take the value from the reader instead. In this
way indexWriter.docCount() would deliver the same value as
indexReader.maxDoc().
test and patch are attached,
Christoph
--
*****************************************************************
* Dr. Christoph Goller Tel.: +49 89 203 45734 *
* Detego Software GmbH Mobile: +49 179 1128469 *
* Keuslinstr. 13 Fax.: +49 721 151516176 *
* 80798 M�nchen, Germany Email: [EMAIL PROTECTED] *
*****************************************************************
Index: IndexWriter.java
===================================================================
RCS file:
/home/cvspublic/jakarta-lucene/src/java/org/apache/lucene/index/IndexWriter.java,v
retrieving revision 1.14
diff -u -r1.14 IndexWriter.java
--- IndexWriter.java 12 Aug 2003 15:05:03 -0000 1.14
+++ IndexWriter.java 3 Sep 2003 14:55:33 -0000
@@ -355,7 +355,7 @@
if ((reader.directory == this.directory) || // if we own the directory
(reader.directory == this.ramDirectory))
segmentsToDelete.addElement(reader); // queue segment for deletion
- mergedDocCount += si.docCount;
+ mergedDocCount += reader.numDocs();
}
if (infoStream != null) {
infoStream.println();
import java.io.IOException;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
/*
* Created on 03.09.2003
*
* To change the template for this generated file go to
* Window>Preferences>Java>Code Generation>Code and Comments
*/
/**
*
* @author goller
*/
public class IndexWriterDocCountTest {
int docCount = 0;
void addDoc(IndexWriter writer)
{
Document doc = new Document();
doc.add(Field.Keyword("id","id" + docCount));
doc.add(Field.UnStored("content","aaa"));
try {
writer.addDocument(doc);
}
catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
docCount++;
}
public static void main(String[] args) {
Directory dir = new RAMDirectory();
IndexWriterDocCountTest test = new IndexWriterDocCountTest();
IndexWriter writer = null;
IndexReader reader = null;
int i;
try {
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), true);
for (i = 0; i < 100; i++)
test.addDoc(writer);
System.out.println("docCount: " + writer.docCount());
writer.close();
reader = IndexReader.open(dir);
for (i = 0; i < 50; i++)
reader.delete(i);
reader.close();
System.out.println("doc #0-49 deleted");
writer = new IndexWriter(dir, new WhitespaceAnalyzer(), false);
System.out.println("docCount: " + writer.docCount());
writer.optimize();
System.out.println("optimized called");
System.out.println("docCount: " + writer.docCount());
writer.close();
}
catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]