Hi Yann-Erwan, Thank you for the detailed reply. Your idea seems reasonable. I will give it a try for out environment settings.
Wei On Tue, Mar 26, 2013 at 5:22 PM, Yann-Erwan Perio <ye.pe...@gmail.com> wrote: > On Sun, Mar 24, 2013 at 10:46 AM, Wei Wang <welshw...@gmail.com> wrote: > > Hi, > >> For example, assume we have fields F1 and F2, we would like to find >> all documents with condition F1+F2 > 5.0. This filter may be combined >> with other filters to form a BooleanFilter. >> >> The question is, is there any way to construct an efficient filter to do >> this? > > I don't know - but the API looked interesting, so I gave it a try (see > below). I had never worked with search filters before writing that > code, so please proceed with caution, as I am not sure of many things > (iteration of all documents, treatment of deleted documents, what is > that "acceptDocs" variable, what threading constraints to respect...). > > --- > // add your package declaration > > > import static org.junit.Assert.assertEquals; > import static org.junit.Assert.assertTrue; > > import java.io.IOException; > import java.util.Arrays; > import java.util.HashSet; > import java.util.Iterator; > import java.util.Set; > import java.util.TreeSet; > > import org.apache.lucene.analysis.Analyzer; > import org.apache.lucene.analysis.standard.StandardAnalyzer; > import org.apache.lucene.document.Document; > import org.apache.lucene.document.Field; > import org.apache.lucene.document.IntField; > import org.apache.lucene.index.AtomicReader; > import org.apache.lucene.index.AtomicReaderContext; > import org.apache.lucene.index.DirectoryReader; > import org.apache.lucene.index.IndexReader; > import org.apache.lucene.index.IndexWriter; > import org.apache.lucene.index.IndexWriterConfig; > import org.apache.lucene.search.DocIdSet; > import org.apache.lucene.search.DocIdSetIterator; > import org.apache.lucene.search.FieldCache; > import org.apache.lucene.search.FieldCache.Ints; > import org.apache.lucene.search.Filter; > import org.apache.lucene.search.IndexSearcher; > import org.apache.lucene.search.MatchAllDocsQuery; > import org.apache.lucene.search.TopDocs; > import org.apache.lucene.store.Directory; > import org.apache.lucene.store.RAMDirectory; > import org.apache.lucene.util.Bits; > import org.apache.lucene.util.Version; > import org.junit.Before; > import org.junit.Test; > > public class FilterTest { > > private static final Version VERSION = Version.LUCENE_42; > > private static final String FIELD_ID = "id"; > private static final String FIELD_ALPHA = "alpha"; > private static final String FIELD_OMEGA = "omega"; > > private static final int SUM_THRESHOLD = 5; > private static final int[] VALUES_ALPHA = new int[] { 1, 2, 3, 4, 5 }; > private static final int[] VALUES_OMEGA = new int[] { 5, 0, 5, 0, 5 }; > private static final Set<Integer> EXPECTED_MATCHED_DOCUMENT_IDS = new > HashSet<Integer>(Arrays.asList(0, 2, 4)); > > private Directory directory; > > @Before > public void setUp() throws IOException { > directory = new RAMDirectory(); > > Analyzer analyzer = new StandardAnalyzer(VERSION); > IndexWriterConfig config = new IndexWriterConfig(VERSION, > analyzer); > config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); > IndexWriter writer = new IndexWriter(directory, config); > > for (int ii = 0; ii < VALUES_ALPHA.length; ii++) { > Document doc = new Document(); > Field id = new IntField(FIELD_ID, ii, > IntField.Store.YES); > Field alpha = new IntField(FIELD_ALPHA, > VALUES_ALPHA[ii], > IntField.Store.YES); > Field omega = new IntField(FIELD_OMEGA, > VALUES_OMEGA[ii], > IntField.Store.YES); > doc.add(id); > doc.add(alpha); > doc.add(omega); > writer.addDocument(doc); > } > > writer.close(); > } > > @Test > public void testSumFilter() throws IOException { > IndexReader reader = DirectoryReader.open(directory); > IndexSearcher searcher = new IndexSearcher(reader); > TopDocs results = searcher.search(new MatchAllDocsQuery(), new > SumFilter(SUM_THRESHOLD), VALUES_ALPHA.length); > > try { > assertEquals(EXPECTED_MATCHED_DOCUMENT_IDS.size(), > results.totalHits); > for (int ii = 0; ii < results.scoreDocs.length; ii++) > { > int docId = results.scoreDocs[ii].doc; > Document doc = reader.document(docId); > int idValue = > doc.getField(FIELD_ID).numericValue().intValue(); > int alphaValue = > doc.getField(FIELD_ALPHA).numericValue().intValue(); > int omegaValue = > doc.getField(FIELD_OMEGA).numericValue().intValue(); > > > assertTrue(EXPECTED_MATCHED_DOCUMENT_IDS.contains(idValue)); > assertTrue(alphaValue + omegaValue > > SUM_THRESHOLD); > } > } finally { > reader.close(); > } > } > > private class SumFilter extends Filter { > > private int minValue; > > public SumFilter(int minValue) { > this.minValue = minValue; > } > > @Override > public DocIdSet getDocIdSet(AtomicReaderContext context, Bits > acceptDocs) throws IOException { > AtomicReader reader = context.reader(); > Ints alphaCache = FieldCache.DEFAULT.getInts(reader, > FIELD_ALPHA, false); > Ints omegaCache = FieldCache.DEFAULT.getInts(reader, > FIELD_OMEGA, false); > SimpleDocIdSet docIdSet = new SimpleDocIdSet(); > > int maxDoc = reader.maxDoc(); > for (int docId = 0; docId < maxDoc; docId++) { > int sum = alphaCache.get(docId) + > omegaCache.get(docId); > if (sum > minValue) { > docIdSet.add(docId); > } > } > > return docIdSet; > } > } > > private class SimpleDocIdSet extends DocIdSet { > > private final TreeSet<Integer> sortedDocIdSet = new > TreeSet<Integer>(); > > public void add(int docId) { > sortedDocIdSet.add(docId); > } > > @Override > public DocIdSetIterator iterator() throws IOException { > return new DocIdSetIterator() { > > private Iterator<Integer> > sortedDocIdSetIterator = > sortedDocIdSet.iterator(); > private int currentDocId = -1; > > @Override > public int advance(int target) throws > IOException { > while ((currentDocId = nextDoc()) < > target) { > } > return currentDocId; > } > > @Override > public int docID() { > if (currentDocId == -1) { > return -1; > } > if > (!sortedDocIdSetIterator.hasNext()) { > return NO_MORE_DOCS; > } > return currentDocId; > } > > @Override > public int nextDoc() throws IOException { > if > (!sortedDocIdSetIterator.hasNext()) { > return NO_MORE_DOCS; > } > currentDocId = > sortedDocIdSetIterator.next(); > return currentDocId; > } > }; > } > } > > } > > --- > > Regards, > Yep. > > --------------------------------------------------------------------- > To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org > For additional commands, e-mail: java-user-h...@lucene.apache.org > --------------------------------------------------------------------- To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org For additional commands, e-mail: java-user-h...@lucene.apache.org