msokolov commented on a change in pull request #1316: LUCENE-8929 parallel 
early termination in TopFieldCollector using minmin score
URL: https://github.com/apache/lucene-solr/pull/1316#discussion_r396134242
 
 

 ##########
 File path: 
lucene/core/src/java/org/apache/lucene/search/ParallelSortedCollector.java
 ##########
 @@ -0,0 +1,612 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.PriorityQueue;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.FieldValueHitQueue.Entry;
+import org.apache.lucene.search.TotalHits.Relation;
+
+/**
+ * A {@link Collector} for results sorted by field, optimized for early 
termination in
+ * the case where the {@link Sort} matches the index and the search is 
executed in parallel,
+ * using multiple threads.
+ *
+ * @lucene.experimental
+ */
+abstract class ParallelSortedCollector extends TopDocsCollector<Entry> {
+
+  private static final ScoreDoc[] EMPTY_SCOREDOCS = new ScoreDoc[0];
+
+  final int numHits;
+  final Sort sort;
+  final HitsThresholdChecker hitsThresholdChecker;
+  final FieldComparator<?> firstComparator;
+
+  // the current local minimum competitive score already propagated to the 
underlying scorer
+  float minCompetitiveScore;
+
+  // Enables global early termination with concurrent threads using minimum 
competitive scores and
+  // collected counts of all segments
+  final MaxScoreTerminator maxScoreTerminator;
+
+  final int numComparators;
+  FieldValueHitQueue.Entry bottom = null;
+  boolean queueFull;
+  int docBase;
+  final boolean needsScores;
+  final ScoreMode scoreMode;
+
+  // Declaring the constructor private prevents extending this class by anyone
+  // else. Note that the class cannot be final since it's extended by the
+  // internal versions. If someone will define a constructor with any other
+  // visibility, then anyone will be able to extend the class, which is not 
what
+  // we want.
+  private ParallelSortedCollector(FieldValueHitQueue<Entry> pq, int numHits, 
Sort sort,
+                                  HitsThresholdChecker hitsThresholdChecker, 
boolean needsScores,
+                                  MaxScoreTerminator maxScoreTerminator) {
+    super(pq);
+    this.needsScores = needsScores;
+    this.numHits = numHits;
+    this.sort = sort;
+    this.hitsThresholdChecker = hitsThresholdChecker;
+    this.maxScoreTerminator = maxScoreTerminator;
+    numComparators = pq.getComparators().length;
+    firstComparator = pq.getComparators()[0];
+    scoreMode = needsScores ? ScoreMode.COMPLETE : 
ScoreMode.COMPLETE_NO_SCORES;
+  }
+
+  private abstract class TopFieldLeafCollector implements LeafCollector {
+
+    final LeafFieldComparator comparator;
+    final int firstReverseMul;
+    final int reverseMul;
+    final LeafReaderContext context;
+    final MaxScoreTerminator.LeafState leafTerminationState;
+
+    private double score;
+    Scorable scorer;
+
+    TopFieldLeafCollector(FieldValueHitQueue<Entry> queue, LeafReaderContext 
context) throws IOException {
+      LeafFieldComparator[] comparators = queue.getComparators(context);
+      firstReverseMul = queue.reverseMul[0];
+      if (comparators.length == 1) {
+        this.reverseMul = queue.reverseMul[0];
+        this.comparator = comparators[0];
+      } else {
+        this.reverseMul = 1;
+        this.comparator = new MultiLeafFieldComparator(comparators, 
queue.reverseMul);
+      }
+      this.context = context;
+      leafTerminationState = maxScoreTerminator.addLeafState();
+    }
+
+    void countHit() {
+      ++totalHits;
+      // TODO: replace hitsThresholdChecker with something simpler
+      hitsThresholdChecker.incrementHitCount();
+    }
+
+    void collectHitIfCompetitive(int doc) throws IOException {
+      if (reverseMul * comparator.compareBottom(doc) > 0) {
+        comparator.copy(bottom.slot, doc);
+        score = getComparatorValue(bottom.slot);
+        //System.out.printf("leaf=%d doc=%d score=%f\n", context.ord, 
context.docBase + doc, score);
+        updateBottom(doc);
+        comparator.setBottom(bottom.slot);
+      } else {
+        // The comparator has no score from this document. We can use any 
noncompetitive value
+        // to induce this leaf to be terminated.
+        //System.out.printf("leaf=%d doc=%d (noncompetitive) score=%f\n", 
context.ord, context.docBase + doc, score);
+        score = getComparatorValue(bottom.slot) + 1;
+      }
+    }
+
+    void collectAnyHit(int doc, int hitsCollected) throws IOException {
+      // Startup transient: queue hasn't gathered numHits yet
+      int slot = hitsCollected - 1;
+      // Copy hit into queue
+      comparator.copy(slot, doc);
+      // compute the doc's score before it gets moved by updating the priority 
queue
+      score = getComparatorValue(slot);
+      add(slot, doc);
+      if (queueFull) {
+        comparator.setBottom(bottom.slot);
+      }
+    }
+
+    private double getComparatorValue(int slot) {
+      // We could avoid this cast by genericizing MaxScoreAccumulator and 
having its Number type
+      // co-vary with this FieldComparator; also - should DocComparator extend
+      // NumericComparator<Integer>?
+      return firstReverseMul * ((Number) 
firstComparator.value(slot)).doubleValue();
+    }
+
+    void updateTerminationState(int doc) {
+      leafTerminationState.update(score, context.docBase + doc);
+      if ((leafTerminationState.resultCount & maxScoreTerminator.intervalMask) 
== 0) {
+        //System.out.println("scoreboard update leaf=" + context.ord + " doc=" 
+ context.docBase + "+" + doc + " total=" + totalHits);
+        if (maxScoreTerminator.updateLeafState(leafTerminationState)) {
+          // Stop if across all segments we have collected enough, and our 
scores are no longer competitive
+          totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
+          //System.out.println("scoreboard terminate leaf " + context.ord + " 
doc=" + context.docBase + "+" + doc + " totalHits=" + totalHits + " score=" + 
(long) score);
+          throw new CollectionTerminatedException();
+        }
+      }
+    }
+
+    @Override
+    public void setScorer(Scorable scorer) throws IOException {
+      this.scorer = scorer;
+      comparator.setScorer(scorer);
+      minCompetitiveScore = 0f;
 
 Review comment:
   I don't understand - do you mean check a flag like `canEarlyTerminate` that 
we have in TopFieldLeafCollector? -- this is only created when early 
termination is possible (see isApplicable)

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@lucene.apache.org
For additional commands, e-mail: issues-h...@lucene.apache.org

Reply via email to