Hello,
I have tried to switch my application from Lucene 2.4.1 to Lucene 2.9, but I
have found a problem.
My searcher uses a MultiReader and, when I try to do a search using a custom
filter based on a bitset, it does not behave as it did in Lucene 2.4.
It looks like the new searcher does not use the "offset" when it reads the
subreaders docIds...
I have written a self-contained test to show the problem:
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.OpenBitSet;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
public class Lucene_2_9SearcherTest {
private Directory dir1 = new RAMDirectory();
private Directory dir2 = new RAMDirectory();
private Analyzer analyzer = new WhitespaceAnalyzer();
@Before
public void setUp() throws Exception {
this.createIndex1();
this.createIndex2();
}
@After
public void tearDown() throws Exception {
}
@Test
public void testSearchWithMultiReader() throws CorruptIndexException,
IOException {
IndexReader reader = this.getMultiReader();
OpenBitSet bitSet = new OpenBitSet(10);
bitSet.fastSet(1);
bitSet.fastSet(2);
bitSet.fastSet(6);
Filter filter = new DocIdSetFilter(bitSet);
DocIdSetIterator docIdIt = filter.getDocIdSet(reader).iterator();
int numDocs = 0;
System.out.println("Filter extraction:");
while (docIdIt.next()) {
System.out.println("Extracted: " + docIdIt.doc() + " --> " +
reader.document(docIdIt.doc()).getField("text").stringValue());
numDocs++;
}
assertEquals(3, numDocs);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), filter,
10);
int totSearchDocs = topDocs.totalHits;
// assertEquals(3, totSearchDocs);
ScoreDoc[] hits = topDocs.scoreDocs;
System.out.println("\nSearcher extraction:");
for (ScoreDoc sd : hits) {
System.out.println("Extracted: " + sd.doc + " --> " +
reader.document(sd.doc).getField("text").stringValue());
}
}
private void createIndex1() throws CorruptIndexException,
LockObtainFailedException, IOException {
IndexWriter writer = new IndexWriter(dir1, analyzer, true,
MaxFieldLength.UNLIMITED);
Document doc = new Document();
doc.add(new Field("text", "a", Field.Store.YES,
Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("text", "b", Field.Store.YES,
Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("text", "c", Field.Store.YES,
Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("text", "d", Field.Store.YES,
Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("text", "e", Field.Store.YES,
Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
}
private void createIndex2() throws CorruptIndexException,
LockObtainFailedException, IOException {
IndexWriter writer = new IndexWriter(dir2, analyzer, true,
MaxFieldLength.UNLIMITED);
Document doc = new Document();
doc.add(new Field("text", "x", Field.Store.YES,
Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("text", "y", Field.Store.YES,
Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
doc = new Document();
doc.add(new Field("text", "z", Field.Store.YES,
Field.Index.NOT_ANALYZED));
writer.addDocument(doc);
writer.optimize();
writer.close();
}
private IndexReader getMultiReader() throws CorruptIndexException,
IOException {
IndexReader[] subReaders = new IndexReader[] {
IndexReader.open(dir1, false), IndexReader.open(dir2, false) };
MultiReader reader = new MultiReader(subReaders);
return (reader);
}
private class DocIdSetFilter extends Filter {
private static final long serialVersionUID = 1L;
private DocIdSet myBitset;
public DocIdSetFilter(DocIdSet bitset) {
this.myBitset = bitset;
}
@Override
public DocIdSet getDocIdSet(IndexReader reader) throws IOException {
return (this.myBitset);
}
}
}
In Lucene 2.4.1 the output is:
Filter extraction:
Extracted: 1 --> b
Extracted: 2 --> c
Extracted: 6 --> y
Searcher extraction:
Extracted: 1 --> b
Extracted: 2 --> c
Extracted: 6 --> y
while in Lucene 2.9 I have:
Filter extraction:
Extracted: 1 --> b
Extracted: 2 --> c
Extracted: 6 --> y
Searcher extraction:
Extracted: 1 --> b
Extracted: 2 --> c
Extracted: 6 --> y
Extracted: 7 --> z
Is it a bug in the new Lucene searcher or am I missing something?
Thanks,
Bye
Raf