[
https://issues.apache.org/jira/browse/LUCENE-3442?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Uwe Schindler updated LUCENE-3442:
----------------------------------
Description:
If you try to get the iterator for the DocIdSet returned by a
QueryWrapperFilter which wraps a TermQuery you get null instead of an iterator
that returns the same documents as the search on the TermQuery.
Code demonstrating the issue:
{code:java}
import java.io.IOException;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
public class TestQueryWrapperFilterIterator {
public static void main(String[] args) {
try {
IndexWriterConfig iwconfig = new
IndexWriterConfig(Version.LUCENE_34, new WhitespaceAnalyzer(Version.LUCENE_34));
iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
RAMDirectory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, iwconfig);
Document d = new Document();
d.add(new Field("id", "1001", Store.YES,
Index.NOT_ANALYZED));
d.add(new Field("text", "headline one group one",
Store.YES, Index.ANALYZED));
d.add(new Field("group", "grp1", Store.YES,
Index.NOT_ANALYZED));
writer.addDocument(d);
writer.commit();
writer.close();
IndexReader rdr = IndexReader.open(dir);
IndexSearcher searcher = new IndexSearcher(rdr);
TermQuery tq = new TermQuery(new Term("text",
"headline"));
TopDocs results = searcher.search(tq, 5);
System.out.println("Number of search results: " +
results.totalHits);
Filter f = new QueryWrapperFilter(tq);
DocIdSet dis = f.getDocIdSet(rdr);
DocIdSetIterator it = dis.iterator();
if (it != null) {
int docId = it.nextDoc();
while (docId != DocIdSetIterator.NO_MORE_DOCS) {
Document doc = rdr.document(docId);
System.out.println("Iterator doc: " +
doc.get("id"));
docId = it.nextDoc();
}
} else {
System.out.println("Iterator was null: ");
}
searcher.close();
rdr.close();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
}
{code}
was:
If you try to get the iterator for the DocIdSet returned by a
QueryWrapperFilter which wraps a TermQuery you get null instead of an iterator
that returns the same documents as the search on the TermQuery.
Code demonstrating the issue:
import java.io.IOException;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
public class TestQueryWrapperFilterIterator {
public static void main(String[] args) {
try {
IndexWriterConfig iwconfig = new
IndexWriterConfig(Version.LUCENE_34, new WhitespaceAnalyzer(Version.LUCENE_34));
iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
RAMDirectory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, iwconfig);
Document d = new Document();
d.add(new Field("id", "1001", Store.YES,
Index.NOT_ANALYZED));
d.add(new Field("text", "headline one group one",
Store.YES, Index.ANALYZED));
d.add(new Field("group", "grp1", Store.YES,
Index.NOT_ANALYZED));
writer.addDocument(d);
writer.commit();
writer.close();
IndexReader rdr = IndexReader.open(dir);
IndexSearcher searcher = new IndexSearcher(rdr);
TermQuery tq = new TermQuery(new Term("text",
"headline"));
TopDocs results = searcher.search(tq, 5);
System.out.println("Number of search results: " +
results.totalHits);
Filter f = new QueryWrapperFilter(tq);
DocIdSet dis = f.getDocIdSet(rdr);
DocIdSetIterator it = dis.iterator();
if (it != null) {
int docId = it.nextDoc();
while (docId != DocIdSetIterator.NO_MORE_DOCS) {
Document doc = rdr.document(docId);
System.out.println("Iterator doc: " +
doc.get("id"));
docId = it.nextDoc();
}
} else {
System.out.println("Iterator was null: ");
}
searcher.close();
rdr.close();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
}
Fix Version/s: 3.5
The issue lies in the fact that an optimization in TermQuery prevents it's
Weight.scorer() method to behave correctly when no atomic reader is passed in.
This is no longer supported in Lucene trunk, but in 3.x the weight should still
be able to work on composite readers. The sample code provided does this
exactly: It calls QWF.getDocIdSet on a non-atomic IndexReader. QWF calls
TermWeight.scorer() and this one returns null, because the composite reader is
not in its DF cache.
The fix is easy: Don't early exit in scorer() if the reader passed in is not
atomic.
> QueryWrapperFilter gets null DocIdSetIterator when wrapping TermQuery
> ---------------------------------------------------------------------
>
> Key: LUCENE-3442
> URL: https://issues.apache.org/jira/browse/LUCENE-3442
> Project: Lucene - Java
> Issue Type: Bug
> Components: core/search
> Affects Versions: 3.4
> Environment: java 1.6.0_27
> Reporter: Dan
> Assignee: Uwe Schindler
> Priority: Minor
> Fix For: 3.5
>
>
> If you try to get the iterator for the DocIdSet returned by a
> QueryWrapperFilter which wraps a TermQuery you get null instead of an
> iterator that returns the same documents as the search on the TermQuery.
> Code demonstrating the issue:
> {code:java}
> import java.io.IOException;
> import org.apache.lucene.analysis.WhitespaceAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> import org.apache.lucene.document.Field.Index;
> import org.apache.lucene.document.Field.Store;
> import org.apache.lucene.index.IndexReader;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.index.IndexWriterConfig;
> import org.apache.lucene.index.Term;
> import org.apache.lucene.store.RAMDirectory;
> import org.apache.lucene.util.Version;
> import org.apache.lucene.search.DocIdSet;
> import org.apache.lucene.search.DocIdSetIterator;
> import org.apache.lucene.search.Filter;
> import org.apache.lucene.search.IndexSearcher;
> import org.apache.lucene.search.QueryWrapperFilter;
> import org.apache.lucene.search.TermQuery;
> import org.apache.lucene.search.TopDocs;
> public class TestQueryWrapperFilterIterator {
> public static void main(String[] args) {
> try {
> IndexWriterConfig iwconfig = new
> IndexWriterConfig(Version.LUCENE_34, new
> WhitespaceAnalyzer(Version.LUCENE_34));
> iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
> RAMDirectory dir = new RAMDirectory();
>
> IndexWriter writer = new IndexWriter(dir, iwconfig);
> Document d = new Document();
> d.add(new Field("id", "1001", Store.YES,
> Index.NOT_ANALYZED));
> d.add(new Field("text", "headline one group one",
> Store.YES, Index.ANALYZED));
> d.add(new Field("group", "grp1", Store.YES,
> Index.NOT_ANALYZED));
> writer.addDocument(d);
> writer.commit();
> writer.close();
>
> IndexReader rdr = IndexReader.open(dir);
> IndexSearcher searcher = new IndexSearcher(rdr);
>
> TermQuery tq = new TermQuery(new Term("text",
> "headline"));
>
> TopDocs results = searcher.search(tq, 5);
> System.out.println("Number of search results: " +
> results.totalHits);
>
> Filter f = new QueryWrapperFilter(tq);
>
> DocIdSet dis = f.getDocIdSet(rdr);
>
> DocIdSetIterator it = dis.iterator();
> if (it != null) {
> int docId = it.nextDoc();
> while (docId != DocIdSetIterator.NO_MORE_DOCS) {
> Document doc = rdr.document(docId);
> System.out.println("Iterator doc: " +
> doc.get("id"));
> docId = it.nextDoc();
> }
> } else {
> System.out.println("Iterator was null: ");
> }
>
> searcher.close();
> rdr.close();
> } catch (IOException ioe) {
> ioe.printStackTrace();
> }
> }
> }
> {code}
--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]