Re: AnalyticsQuery fails on a sharded collection

tedsolr Wed, 10 Aug 2016 12:51:53 -0700

I still haven't found the reason for the NPE in my post filter when it runs
against a sharded collection, so I'm posting my code in the hopes that a
seasoned Solr pro might notice something. I thought perhaps not treating the
doc values as multi doc values when indexes are segmented might have been
the issue. But I optimized my test collection to merge the segments and the
search fails in the same spot....


ERROR - 2016-08-10 09:03:20.249; [ShardTest1 shard1_0 core_node3
ShardTest1_shard1_0_replica1] org.apache.solr.common.SolrException;
null:java.lang.NullPointerException
        at
org.apache.solr.handler.component.QueryComponent.returnFields(QueryComponent.java:1305)
        at
org.apache.solr.handler.component.QueryComponent.handleRegularResponses(QueryComponent.java:758)
        at
org.apache.solr.handler.component.QueryComponent.handleResponses(QueryComponent.java:729)
        at
org.apache.solr.handler.component.SearchHandler.handleRequestBody(SearchHandler.java:388)


public class DocumentCollapsingCollector extends DelegatingCollector {
        static final String AGGR_STATS = "AggregationStats";
        static final String SORT_BY_SCORE = "SortByScore";
        private static final String TOTAL_DOCS_STAT = "totalDocCount";
        private final SolrQueryRequest req;
        private final ResponseBuilder rb;
        private final LeafReaderContext[] contexts;
        private final FixedBitSet collapsedSet;
        private final List<SortedDocValues> fieldValues;
        private final NumericDocValues spendValues;
        private final Map<FieldOrdinals, AggregationStats> aggregatedDocs;
        private int docBase;
        private final int maxDoc;
        private final int numberOfFields;
        private int totalDocs;
        private final SearchPreProcessor.SortBy sortBy;

        DocumentCollapsingCollector(int maxDoc, int segments, 
List<SortedDocValues>
docValues, NumericDocValues spendValues,
                        SolrQueryRequest req, ResponseBuilder rb) {

                aggregatedDocs = new HashMap<>();
                this.maxDoc = maxDoc;
                contexts = new LeafReaderContext[segments];
                collapsedSet = new FixedBitSet(maxDoc);
                fieldValues = docValues;
                numberOfFields = docValues.size();
                this.spendValues = spendValues;
                this.req = req;
                this.rb = rb;
                sortBy = (SearchPreProcessor.SortBy) 
req.getContext().get(SORT_BY_SCORE);
        }

        @Override
        public void collect(int doc) throws IOException {
                int globalDoc = doc + docBase;
                int[] ords = new int[numberOfFields];

                int i=0;
                for (SortedDocValues vals : fieldValues) {
                        ords[i++] = vals.getOrd(globalDoc);
                }

                FieldOrdinals ordinals = new FieldOrdinals(ords);
                AggregationStats stats = aggregatedDocs.get(ordinals);
                if (stats != null) {
                        stats.bumpCount();
                        
stats.addSpend(Double.longBitsToDouble(spendValues.get(globalDoc)));
                } else {
                        aggregatedDocs.put(ordinals, new 
AggregationStats(globalDoc,
Double.longBitsToDouble(spendValues.get(globalDoc))));
                }
                totalDocs++;
        }

        @Override
        public boolean needsScores() {
                return sortBy != null;
        }

        @Override
        protected void doSetNextReader(LeafReaderContext context) throws
IOException {
                contexts[context.ord] = context;
                docBase = context.docBase;
        }

        @Override
        public void finish() throws IOException {
                if (contexts.length == 0) {
                        return;
                }

                for (AggregationStats docStats : aggregatedDocs.values()) {
                        collapsedSet.set(docStats.getDocId());
                }

                // saving the stats to the request context so that a doc 
transformer can
pick them up
                AggregationStatsArray stats = new
AggregationStatsArray(aggregatedDocs.values());
                ImmutableSparseArray<AggregationStats> statsArray = new
ImmutableSparseArray<AggregationStats>(stats);
                req.getContext().put(AGGR_STATS, statsArray);

                int currentContext = 0;
                int currentDocBase = 0;
                int nextDocBase = currentContext+1 < contexts.length ?
contexts[currentContext+1].docBase : maxDoc;

                super.leafDelegate =
super.delegate.getLeafCollector(contexts[currentContext]);
                DummyScorer dummy = new DummyScorer();
                super.leafDelegate.setScorer(dummy);

                BitSetIterator it = new BitSetIterator(collapsedSet, 0L);
                int docId = -1;

                while ((docId = it.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) 
{
                        if (SearchPreProcessor.SortBy.COUNT.equals(sortBy)) {
                                dummy.score = statsArray.get(docId).getCount();
                        } else if 
(SearchPreProcessor.SortBy.SPEND.equals(sortBy)) {
                                dummy.score = (float) 
statsArray.get(docId).getSpend();
                        }

                        while (docId >= nextDocBase) {
                                currentContext++;
                                currentDocBase = 
contexts[currentContext].docBase;
                                nextDocBase = currentContext+1 < 
contexts.length ?
contexts[currentContext+1].docBase : maxDoc;

                                super.leafDelegate =
super.delegate.getLeafCollector(contexts[currentContext]);
                                super.leafDelegate.setScorer(dummy);
                        }

                        int contextDoc = docId-currentDocBase;
                        dummy.docId = contextDoc;
                        super.leafDelegate.collect(contextDoc);
                }

                rb.rsp.add(TOTAL_DOCS_STAT, Integer.valueOf(totalDocs));

                if (super.delegate instanceof DelegatingCollector) {
                        ((DelegatingCollector) super.delegate).finish();
                }
        }

        private class FieldOrdinals {
                private final int[] ords;

                FieldOrdinals(int[] ords) {
                        this.ords = ords;
                }

                int[] getOrds() {
                        return ords;
                }

                @Override
                public int hashCode() {
                        return Arrays.hashCode(ords);
                }

                @Override
                public boolean equals(Object obj) {
                        return Arrays.equals(ords, 
((FieldOrdinals)obj).getOrds());
                }
        }

        private class DummyScorer extends Scorer {
                float score;
                int docId;

                DummyScorer() {
                        super(null);
                }

                @Override
                public float score() throws IOException {
                        return score;
                }

                @Override
                public int freq() throws IOException {
                        return 0;
                }

                @Override
                public int advance(int i) throws IOException {
                        return -1;
                }

                @Override
                public long cost() {
                        return 0;
                }

                @Override
                public int docID() {
                        return docId;
                }

                @Override
                public int nextDoc() throws IOException {
                        return 0;
                }
        }
}



--
View this message in context: 
http://lucene.472066.n3.nabble.com/AnalyticsQuery-fails-on-a-sharded-collection-tp4289274p4291180.html
Sent from the Solr - User mailing list archive at Nabble.com.

Re: AnalyticsQuery fails on a sharded collection

Reply via email to