Re: [PR] BooleanQuery: narrow bulk scoring when FILTER matches primary index sort [lucene]

via GitHub Tue, 12 May 2026 02:47:45 -0700


iverase commented on code in PR #15991:
URL: https://github.com/apache/lucene/pull/15991#discussion_r3225331964



##########
lucene/core/src/test/org/apache/lucene/search/BasePrimarySortFilterTestCase.java:
##########
@@ -0,0 +1,413 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.search;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.analysis.MockAnalyzer;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.TestUtil;
+import org.junit.Assert;
+
+/**
+ * Base test case for {@link PrimarySortAlignable} filter queries used with 
{@link
+ * FilteredOnPrimaryIndexSortFieldQuery}. Subclasses supply the index schema, 
filter query, and
+ * expected hit counts; common tests verify rewrite, hit count, multi-segment 
correctness vs a
+ * simpler boolean shape, two FILTER clauses, MUST_NOT interaction, and (when 
{@link
+ * #densePrimarySortBulkChecksOrNull()} is non-null) bulk-scorer narrowing, 
cost, and recorded
+ * scoring windows.
+ *
+ * <p>Concrete implementations exist for each supported filter type (numeric 
range, point range,
+ * sorted-set range, term query, etc.). Shared helpers live in {@link 
RecordingMatchAllQuery}.
+ */
+public abstract class BasePrimarySortFilterTestCase extends LuceneTestCase {
+
+  /** Number of documents indexed; randomized per test run in {@link 
#setUp()}. */
+  protected int numDocs;
+
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+    numDocs = TestUtil.nextInt(random(), 50, 200);
+  }
+
+  /**
+   * When the index maps doc {@code n} to the {@code n}-th sort key in order 
(so the filter's dense
+   * doc-id interval is known), return the interval and expected stats for 
bulk-scorer checks.
+   *
+   * <p>{@link #testBulkScorerNarrowingCostAndRecording} assumes a 
force-merged single segment.
+   */
+  protected record DensePrimarySortBulkChecks(
+      int denseMinDocInclusive, int denseMaxDocExclusive, int 
expectedMatchingDocs) {}
+
+  /** Returns {@code null} if bulk-scorer slice tests do not apply to this 
fixture. */
+  protected DensePrimarySortBulkChecks densePrimarySortBulkChecksOrNull() {
+    return null;
+  }
+
+  /** Returns the index sort for the primary sort field under test. */
+  protected abstract Sort buildIndexSort();
+
+  /** Adds a single document at logical position {@code i} (0-based) to the 
writer. */
+  protected abstract void addDocument(IndexWriter writer, int i) throws 
IOException;
+
+  /**
+   * Returns the FILTER query that selects a contiguous sub-range of the 
sorted index. The query
+   * must implement {@link PrimarySortAlignable}.
+   */
+  protected abstract Query buildFilterQuery();
+
+  /** Expected number of documents matched by {@link #buildFilterQuery()}. */
+  protected abstract int expectedFilteredHitCount();
+
+  /**
+   * If the filter type supports a second, wider range for the two-filter 
intersection test, return
+   * it here. Return {@code null} to skip {@link #testTwoOptimizableFilters()}.
+   */
+  protected Query buildWiderFilterQuery() {
+    return null;
+  }
+
+  // ---- index helpers ----
+
+  private IndexWriterConfig buildIndexWriterConfig() {
+    IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
+    iwc.setIndexSort(buildIndexSort());
+    iwc.setMergePolicy(newMergePolicy());
+    return iwc;
+  }
+
+  // ---- tests ----
+
+  /** The filter query must implement {@link PrimarySortAlignable}. */
+  public void testFilterImplementsPrimarySortAlignable() {
+    Query filter = buildFilterQuery();
+    assertTrue(
+        filter.getClass().getSimpleName() + " must implement 
PrimarySortAlignable",
+        filter instanceof PrimarySortAlignable);
+  }
+
+  /**
+   * BooleanQuery with the filter as FILTER rewrites to {@link 
FilteredOnPrimaryIndexSortFieldQuery}
+   * (possibly wrapped, e.g. in {@link ConstantScoreQuery}).
+   *
+   * <p>Uses {@link RecordingMatchAllQuery} as the MUST clause: {@link 
MatchAllDocsQuery} + FILTER
+   * is rewritten to {@link ConstantScoreQuery} earlier in {@link 
BooleanQuery#rewrite}, which
+   * prevents the primary-sort optimization from applying at the query shape 
we need to assert here.
+   */
+  public void testRewriteProducesFilteredQuery() throws IOException {
+    try (Directory dir = newDirectory()) {
+      buildAndPopulateIndex(dir, true);
+      try (DirectoryReader reader = DirectoryReader.open(dir)) {
+        IndexSearcher searcher = newSearcher(reader);
+        Query query = buildBooleanQuery(new RecordingMatchAllQuery(), 
buildFilterQuery());
+        Query rewritten = searcher.rewrite(query);
+        assertNotNull(
+            "expected FilteredOnPrimaryIndexSortFieldQuery in rewrite chain",
+            unwrapFilteredOnPrimaryIndexSortFieldQuery(rewritten));
+      }
+    }
+  }
+
+  /** Filtered search returns exactly the expected number of hits. */
+  public void testFilteredHitCount() throws IOException {

Review Comment:
   I was playing with. the tests a bit an notice this is not testing this PR as 
the boolean query doesn't get rewritten into a 
`FilteredOnPrimaryIndexSortFieldQuery` because the MatchAllDocsQuery gets 
removed. 
   
   Can we make sure we are actually rewriting the query into a 
`FilteredOnPrimaryIndexSortFieldQuery` in all the tests that we are expecting 
that?



##########
lucene/core/src/java/org/apache/lucene/search/IndexSortSortedNumericDocValuesRangeQuery.java:
##########
@@ -544,6 +545,44 @@ private IteratorAndCount 
getDocIdSetIteratorOrNull(LeafReaderContext context) th
     return null;
   }
 
+  DocIdRange getDenseDocIdRangeForPrimarySort(LeafReaderContext context) 
throws IOException {
+    IteratorAndCount itAndCount = getDocIdSetIteratorOrNull(context);
+    if (itAndCount == null || itAndCount.count() < 0) {
+      return null;
+    }
+    return new DocIdRange(itAndCount.minDoc(), itAndCount.maxDoc());
+  }
+
+  @Override
+  public DocIdRange denseDocIdRangeOrNull(LeafReaderContext context) throws 
IOException {
+    return getDenseDocIdRangeForPrimarySort(context);
+  }
+
+  /**
+   * Dense doc-id span for the same primary-sort fast path as this query, for 
sorted-numeric bounds
+   * {@code field}, {@code lowerValue}, and {@code upperValue} (same semantics 
as this class's
+   * constructor). Used when the filter query is not an {@code
+   * IndexSortSortedNumericDocValuesRangeQuery} instance but matches those 
bounds (e.g. {@code
+   * SortedNumericDocValuesRangeQuery} or a 1D {@link PointRangeQuery}).
+   *
+   * <p>This allocates a short-lived query instance internally. It is called 
once per leaf at weight
+   * creation time, not on a hot scoring path.
+   *
+   * @param context the leaf reader context
+   * @param field the sorted-numeric field name (must be the primary index 
sort field)
+   * @param lowerValue inclusive lower bound
+   * @param upperValue inclusive upper bound
+   * @return the dense doc-id range, or {@code null} if a safe range cannot be 
determined
+   * @lucene.internal
+   */
+  public static DocIdRange denseDocIdRangeOrNullForSortedNumericBounds(

Review Comment:
   This method feels not to belong here, maybe move it to PrimarySortAlignables 
like you have done for terms?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] BooleanQuery: narrow bulk scoring when FILTER matches primary index sort [lucene]

Reply via email to