ToChildBlockJoinQuery question

McKinley, James T Wed, 21 Jan 2015 07:03:38 -0800

Hi,

I'm attempting to use ToChildBlockJoinQuery in Lucene 4.8.1 by following Mike 
McCandless' blog post:


http://blog.mikemccandless.com/2012/01/searching-relational-content-with.html

I have a set of child documents which are named works and a set of parent 
documents which are named persons that are the creators of the named works.  
The parent document has a nationality and the child document does not.  I want 
to query the children (named works) limiting by the nationality of the parent 
(named person).  I've indexed the documents as follows (I'm pulling the docs 
from an existing index):

        private void createNamedWorkIndex(String srcIndexPath, String 
destIndexPath) throws IOException {
                FSDirectory srcDir = FSDirectory.open(new File(srcIndexPath));
                FSDirectory destDir = FSDirectory.open(new File(destIndexPath));
                
                IndexReader reader = DirectoryReader.open(srcDir);
                
                Version version = Version.LUCENE_48;
                IndexWriterConfig conf = new IndexWriterConfig(version, new 
StandardTextAnalyzer(version));
                
                Set<String> crids = getCreatorIds(reader);
                
                String[] crida = crids.toArray(new String[crids.size()]);
                
                int numThreads = 24;
                ExecutorService executor = 
Executors.newFixedThreadPool(numThreads);
                
                int numCrids = crids.size();
                int batchSize = numCrids / numThreads;
                int remainder = numCrids % numThreads;
                
                System.out.println("Inserting work/creator blocks using " + 
numThreads + " threads...");
                try (IndexWriter writer = new IndexWriter(destDir, conf)){
                        for (int i = 0; i < numThreads; i++) {
                                String[] cridRange;
                                if (i == numThreads - 1) {
                                        cridRange = Arrays.copyOfRange(crida, 
i*batchSize, ((i+1)*batchSize - 1) + remainder);
                                } else {
                                        cridRange = Arrays.copyOfRange(crida, 
i*batchSize, ((i+1)*batchSize - 1));
                                }
                                String id = "" + ((char)('A' + i));
                                Runnable indexer = new IndexRunnable(id , 
reader, writer, new HashSet<String>(Arrays.asList(cridRange)));
                                executor.execute(indexer);
                        }
                        executor.shutdown();
                        executor.awaitTermination(2, TimeUnit.HOURS);
                } catch (Exception e) {
                        executor.shutdownNow();
                        throw new RuntimeException(e);
                } finally {
                        reader.close();
                        srcDir.close();
                        destDir.close();
                }
                
                System.out.println("Done!");
        }

        public static class IndexRunnable implements Runnable {
                private String id;
                private IndexReader reader;
                private IndexWriter writer;
                private Set<String> crids;

                public IndexRunnable(String id, IndexReader reader, IndexWriter 
writer, Set<String> crids) {
                        this.id = id;
                        this.reader = reader;
                        this.writer = writer;
                        this.crids = crids;
                }
                
                @Override
                public void run() {
                        IndexSearcher searcher = new IndexSearcher(reader);

                        try {
                                int count = 0;
                                for (String crid : crids) {
                                        List<Document> docs = new ArrayList<>();
                                        
                                        BooleanQuery abidQuery = new 
BooleanQuery();
                                        abidQuery.add(new TermQuery(new 
Term("ABID", crid)), Occur.MUST);
                                        abidQuery.add(new TermQuery(new 
Term("AGPR", "true")), Occur.MUST);
                                        
                                        TermQuery cridQuery = new TermQuery(new 
Term("CRID", crid));
                                        
                                        TopDocs creatorDocs = 
searcher.search(abidQuery, Integer.MAX_VALUE);
                                        TopDocs workDocs = 
searcher.search(cridQuery, Integer.MAX_VALUE);
                                        
                                        for (int i = 0; i < 
workDocs.scoreDocs.length; i++) {
                                                
docs.add(reader.document(workDocs.scoreDocs[i].doc));
                                        }
                                        
                                        if (creatorDocs.scoreDocs.length > 0) {
                                                
docs.add(reader.document(creatorDocs.scoreDocs[0].doc));
                                        }
                                        
                                        writer.addDocuments(docs);
                                        if (++count % 100 == 0) {
                                                System.out.println(id + " = " + 
count);
                                                writer.commit();
                                        }
                                }
                        } catch (IOException e) {
                                throw new RuntimeException(e);
                        }
                }
        }

I then attempt to perform a block join query as follows:

        private void runToChildBlockJoinQuery(String indexPath) throws 
IOException {
                FSDirectory dir = FSDirectory.open(new File(indexPath));
                IndexReader reader = DirectoryReader.open(dir);
                IndexSearcher searcher = new IndexSearcher(reader);
                
                TermQuery parentQuery = new TermQuery(new Term("NT", 
"american"));
                TermQuery parentFilterQuery = new TermQuery(new Term("AGTY", 
"np"));
                Filter parentFilter = new CachingWrapperFilter(new 
QueryWrapperFilter(parentFilterQuery));
                
                ToChildBlockJoinQuery tcbjq = new 
ToChildBlockJoinQuery(parentQuery, parentFilter, true);
                
                TopDocs worksDocs = searcher.search(tcbjq, 20);
                
                displayWorks(reader, searcher, worksDocs);
        }

and I get the following exception:

Exception in thread "main" java.lang.IllegalStateException: parentFilter must 
return FixedBitSet; got org.apache.lucene.util.WAH8DocIdSet@34e671de
        at 
org.apache.lucene.search.join.ToChildBlockJoinQuery$ToChildBlockJoinWeight.scorer(ToChildBlockJoinQuery.java:148)
        at org.apache.lucene.search.Weight.bulkScorer(Weight.java:131)
        at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:618)
        at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:491)
        at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:448)
        at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:281)
        at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:269)
        at 
BlockJoinQueryTester.runToChildBlockJoinQuery(BlockJoinQueryTester.java:73)
        at BlockJoinQueryTester.main(BlockJoinQueryTester.java:40)

I don't understand what I'm doing wrong and what a "FixedBitSet" is and why I 
don't get one out of my filter.  Is FixedBitSet a special kind of OpenBitSet 
and what does "fixed" mean in this context?  Thanks for any help.

Jim

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org

ToChildBlockJoinQuery question

Reply via email to