romseygeek commented on PR #16069:
URL: https://github.com/apache/lucene/pull/16069#issuecomment-4506112473
I played around a bit with this, and I think this might be a nicer way of
doing the BulkScorer change. Do you want to try applying it and re-running
benchmarks?
```
diff --git
a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
index 81ebc6d0cfe..a879203814e 100644
--- a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
+++ b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
@@ -49,6 +49,7 @@ final class MaxScoreBulkScorer extends BulkScorer {
private final FixedBitSet windowMatches = new
FixedBitSet(INNER_WINDOW_SIZE);
private final double[] windowScores = new double[INNER_WINDOW_SIZE];
+ private FixedBitSet filterMatches = null;
private final DocAndFloatFeatureBuffer docAndScoreBuffer = new
DocAndFloatFeatureBuffer();
private final DocAndScoreAccBuffer docAndScoreAccBuffer;
@@ -70,6 +71,12 @@ final class MaxScoreBulkScorer extends BulkScorer {
maxScoreSums = new double[allScorers.length];
docAndScoreAccBuffer = new DocAndScoreAccBuffer();
docAndScoreAccBuffer.growNoCopy(INNER_WINDOW_SIZE);
+
+ if (this.filter != null && this.filter.twoPhaseView == null
+ && maxDoc >= INNER_WINDOW_SIZE
+ && this.filter.cost >= maxDoc /
DenseConjunctionBulkScorer.DENSITY_THRESHOLD_INVERSE) {
+ this.filterMatches = new FixedBitSet(INNER_WINDOW_SIZE);
+ }
}
// Number of outer windows that have been evaluated
@@ -167,9 +174,6 @@ final class MaxScoreBulkScorer extends BulkScorer {
private void scoreInnerWindowWithFilter(
LeafCollector collector, Bits acceptDocs, int max, DisiWrapper
filter) throws IOException {
- // TODO: Sometimes load the filter into a bitset and use the more
optimized execution paths with
- // this bitset as `acceptDocs`
-
DisiWrapper top = essentialQueue.top();
assert top.doc < max;
while (top.doc < filter.doc) {
@@ -184,6 +188,29 @@ final class MaxScoreBulkScorer extends BulkScorer {
int innerWindowMax = MathUtil.unsignedMin(max, innerWindowMin +
INNER_WINDOW_SIZE);
docAndScoreAccBuffer.size = 0;
+ if (filterMatches == null) {
+ fillScoreBufferViaLeapFrog(top, acceptDocs, innerWindowMax);
+ } else {
+ fillScoreBufferViaBitSet(top, acceptDocs, innerWindowMax);
+ }
+
+ scoreNonEssentialClauses(collector, docAndScoreAccBuffer,
firstEssentialScorer);
+ }
+
+ private void fillScoreBufferViaBitSet(DisiWrapper top, Bits acceptDocs,
int innerWindowMax) throws IOException {
+ filterMatches.clear();
+ int innerWindowMin = top.doc;
+ top.approximation.intoBitSet(innerWindowMax, filterMatches,
innerWindowMin);
+ acceptDocs.applyMask(filterMatches, top.doc);
+
+ while (top.doc < innerWindowMax) {
+ int doc = top.doc;
+ boolean match = filterMatches.get(doc - innerWindowMin);
+ collectScores(top, doc, match);
+ }
+ }
+
+ private void fillScoreBufferViaLeapFrog(DisiWrapper top, Bits acceptDocs,
int innerWindowMax) throws IOException {
while (top.doc < innerWindowMax) {
assert filter.doc <= top.doc; // invariant
if (filter.doc < top.doc) {
@@ -200,25 +227,27 @@ final class MaxScoreBulkScorer extends BulkScorer {
boolean match =
(acceptDocs == null || acceptDocs.get(doc))
&& (filter.twoPhaseView == null ||
filter.twoPhaseView.matches());
- double score = 0;
- do {
- if (match) {
- score += top.scorer.score();
- }
- top.doc = top.iterator.nextDoc();
- top = essentialQueue.updateTop();
- } while (top.doc == doc);
-
- if (match) {
- docAndScoreAccBuffer.grow(docAndScoreAccBuffer.size + 1);
- docAndScoreAccBuffer.docs[docAndScoreAccBuffer.size] = doc;
- docAndScoreAccBuffer.scores[docAndScoreAccBuffer.size] = score;
- docAndScoreAccBuffer.size++;
- }
+ collectScores(top, doc, match);
}
}
+ }
- scoreNonEssentialClauses(collector, docAndScoreAccBuffer,
firstEssentialScorer);
+ private void collectScores(DisiWrapper top, int doc, boolean match)
throws IOException {
+ double score = 0;
+ do {
+ if (match) {
+ score += top.scorer.score();
+ }
+ top.doc = top.iterator.nextDoc();
+ top = essentialQueue.updateTop();
+ } while (top.doc == doc);
+
+ if (match) {
+ docAndScoreAccBuffer.grow(docAndScoreAccBuffer.size + 1);
+ docAndScoreAccBuffer.docs[docAndScoreAccBuffer.size] = doc;
+ docAndScoreAccBuffer.scores[docAndScoreAccBuffer.size] = score;
+ docAndScoreAccBuffer.size++;
+ }
}
private void scoreInnerWindowSingleEssentialClause(
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]