This is an automated email from the ASF dual-hosted git repository.

Jackie-Jiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 4660ed5700e Avoid materializing the full bitmap for count aggregation 
with filters for SV columns (#18543)
4660ed5700e is described below

commit 4660ed5700e7138ec1ee6ae941396d5b5cbb8d8a
Author: Siddharth Teotia <[email protected]>
AuthorDate: Sun Jun 21 19:08:36 2026 -0700

    Avoid materializing the full bitmap for count aggregation with filters for 
SV columns (#18543)
---
 .../filter/InvertedIndexFilterOperator.java        |  52 +++--
 .../filter/InvertedIndexFilterOperatorTest.java    | 132 ++++++++++++
 .../BenchmarkInvertedIndexGetNumMatchingDocs.java  | 239 +++++++++++++++++++++
 3 files changed, 403 insertions(+), 20 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java
 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java
index a82ba160f16..97acb3b326e 100644
--- 
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java
+++ 
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java
@@ -44,6 +44,7 @@ public class InvertedIndexFilterOperator extends 
BaseColumnFilterOperator {
   private final PredicateEvaluator _predicateEvaluator;
   private final InvertedIndexReader<ImmutableRoaringBitmap> 
_invertedIndexReader;
   private final boolean _exclusive;
+  private final boolean _isSingleValue;
 
   InvertedIndexFilterOperator(QueryContext queryContext, PredicateEvaluator 
predicateEvaluator, DataSource dataSource,
       int numDocs) {
@@ -54,6 +55,7 @@ public class InvertedIndexFilterOperator extends 
BaseColumnFilterOperator {
         (InvertedIndexReader<ImmutableRoaringBitmap>) 
dataSource.getInvertedIndex();
     _invertedIndexReader = invertedIndexReader;
     _exclusive = predicateEvaluator.isExclusive();
+    _isSingleValue = dataSource.getDataSourceMetadata().isSingleValue();
   }
 
   @Override
@@ -102,28 +104,38 @@ public class InvertedIndexFilterOperator extends 
BaseColumnFilterOperator {
 
   @Override
   public int getNumMatchingDocs() {
-    int count = 0;
     int[] dictIds = _exclusive ? _predicateEvaluator.getNonMatchingDictIds() : 
_predicateEvaluator.getMatchingDictIds();
-    switch (dictIds.length) {
-      case 0:
-        break;
-      case 1: {
-        count = _invertedIndexReader.getDocIds(dictIds[0]).getCardinality();
-        break;
+    int count;
+    if (_isSingleValue) {
+      // On a single-value column, per-dictId bitmaps partition the docId 
space (each docId has exactly one
+      // dictId), so the union cardinality equals the sum of per-bitmap 
cardinalities. No scratch bitmap is
+      // allocated and no OR pass is performed.
+      count = 0;
+      for (int dictId : dictIds) {
+        count += _invertedIndexReader.getDocIds(dictId).getCardinality();
       }
-      case 2: {
-        count = 
ImmutableRoaringBitmap.orCardinality(_invertedIndexReader.getDocIds(dictIds[0]),
-            _invertedIndexReader.getDocIds(dictIds[1]));
-        break;
-      }
-      default: {
-        // this could be optimised if the bitmaps are known to be disjoint (as 
in a single value bitmap index)
-        MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
-        for (int dictId : dictIds) {
-          bitmap.or(_invertedIndexReader.getDocIds(dictId));
-        }
-        count = bitmap.getCardinality();
-        break;
+    } else {
+      // TODO: For MV column, per-dictId bitmaps may overlap, so we must 
materialize the union to count.
+      // A streaming union-cardinality variant was benchmarked but not 
implemented as a few combinations of
+      // cardinality and number of matching dictIds shows regressions.
+      count = 0;
+      switch (dictIds.length) {
+        case 0:
+          break;
+        case 1:
+          count = _invertedIndexReader.getDocIds(dictIds[0]).getCardinality();
+          break;
+        case 2:
+          count = 
ImmutableRoaringBitmap.orCardinality(_invertedIndexReader.getDocIds(dictIds[0]),
+              _invertedIndexReader.getDocIds(dictIds[1]));
+          break;
+        default:
+          MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+          for (int dictId : dictIds) {
+            bitmap.or(_invertedIndexReader.getDocIds(dictId));
+          }
+          count = bitmap.getCardinality();
+          break;
       }
     }
     return _exclusive ? _numDocs - count : count;
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperatorTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperatorTest.java
new file mode 100644
index 00000000000..b90095ff7d5
--- /dev/null
+++ 
b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperatorTest.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.filter;
+
+import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
+import org.apache.pinot.core.query.request.context.QueryContext;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.segment.spi.datasource.DataSourceMetadata;
+import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+import org.testng.annotations.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+
+
+/**
+ * Targeted tests for {@link 
InvertedIndexFilterOperator#getNumMatchingDocs()}. Five tests cover every
+ * distinct code branch (SV loop, MV switch arms {0, 2, default}, and 
exclusive arithmetic)
+ */
+public class InvertedIndexFilterOperatorTest {
+  private static final int NUM_DOCS = 1000;
+
+  // SV path: union cardinality equals sum of per-bitmap cardinalities under 
the disjoint invariant
+  @Test
+  public void testSvSumCardinalities() {
+    ImmutableRoaringBitmap b0 = bitmap(0, 1, 2, 3);
+    ImmutableRoaringBitmap b1 = bitmap(4, 5, 6);
+    ImmutableRoaringBitmap b2 = bitmap(7, 8, 9, 10, 11);
+    InvertedIndexFilterOperator operator =
+        newOperator(true, false, new int[]{0, 1, 2}, new 
ImmutableRoaringBitmap[]{b0, b1, b2});
+    assertEquals(operator.getNumMatchingDocs(), 12);
+  }
+
+  // SV exclusive path: numDocs minus sum of non-matching cardinalities; also 
exercises getNonMatchingDictIds
+  @Test
+  public void testSvExclusive() {
+    ImmutableRoaringBitmap b0 = bitmap(0, 1, 2, 3);
+    ImmutableRoaringBitmap b1 = bitmap(4, 5, 6);
+    InvertedIndexFilterOperator operator =
+        newOperator(true, true, new int[]{0, 1}, new 
ImmutableRoaringBitmap[]{b0, b1});
+    assertEquals(operator.getNumMatchingDocs(), NUM_DOCS - 7);
+  }
+
+  // MV {@code case 2} arm: pairwise {@code 
ImmutableRoaringBitmap.orCardinality} with overlap
+  @Test
+  public void testMvCase2Pairwise() {
+    ImmutableRoaringBitmap b0 = bitmap(0, 1, 2, 3, 4);
+    ImmutableRoaringBitmap b1 = bitmap(3, 4, 5, 6);
+    InvertedIndexFilterOperator operator =
+        newOperator(false, false, new int[]{0, 1}, new 
ImmutableRoaringBitmap[]{b0, b1});
+    // Union {0,1,2,3,4,5,6} -> 7. Sum-of-cardinalities would give 9 (wrong 
for MV).
+    assertEquals(operator.getNumMatchingDocs(), 7);
+  }
+
+  // MV default arm: regression guard — must return UNION cardinality, never 
sum
+  @Test
+  public void testMvUnionWithOverlap() {
+    ImmutableRoaringBitmap b0 = bitmap(0, 1, 2, 3, 4);
+    ImmutableRoaringBitmap b1 = bitmap(3, 4, 5, 6);
+    ImmutableRoaringBitmap b2 = bitmap(5, 6, 7);
+    InvertedIndexFilterOperator operator =
+        newOperator(false, false, new int[]{0, 1, 2}, new 
ImmutableRoaringBitmap[]{b0, b1, b2});
+    // Union {0..7} -> 8. Sum-of-cardinalities would give 12.
+    assertEquals(operator.getNumMatchingDocs(), 8);
+  }
+
+  // Empty dictIds + exclusive: an easy-to-miss edge — must return numDocs, 
not 0. Also exercises MV case 0
+  @Test
+  public void testMvEmptyDictIdsExclusive() {
+    InvertedIndexFilterOperator operator =
+        newOperator(false, true, new int[0], new ImmutableRoaringBitmap[0]);
+    assertEquals(operator.getNumMatchingDocs(), NUM_DOCS);
+  }
+
+  // ----- helpers -----
+
+  private static ImmutableRoaringBitmap bitmap(int... docIds) {
+    MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+    for (int docId : docIds) {
+      bitmap.add(docId);
+    }
+    return bitmap;
+  }
+
+  @SuppressWarnings({"unchecked", "rawtypes"})
+  private static InvertedIndexFilterOperator newOperator(boolean singleValue, 
boolean exclusive, int[] dictIds,
+      ImmutableRoaringBitmap[] perDictIdBitmaps) {
+    QueryContext queryContext = mock(QueryContext.class);
+    when(queryContext.isNullHandlingEnabled()).thenReturn(false);
+
+    DataSourceMetadata metadata = mock(DataSourceMetadata.class);
+    when(metadata.isSingleValue()).thenReturn(singleValue);
+
+    InvertedIndexReader reader = mock(InvertedIndexReader.class);
+    for (int i = 0; i < dictIds.length; i++) {
+      when(reader.getDocIds(dictIds[i])).thenReturn(perDictIdBitmaps[i]);
+    }
+
+    DataSource dataSource = mock(DataSource.class);
+    when(dataSource.getDataSourceMetadata()).thenReturn(metadata);
+    when(dataSource.getInvertedIndex()).thenReturn(reader);
+
+    PredicateEvaluator predicateEvaluator = mock(PredicateEvaluator.class);
+    when(predicateEvaluator.isExclusive()).thenReturn(exclusive);
+    if (exclusive) {
+      when(predicateEvaluator.getNonMatchingDictIds()).thenReturn(dictIds);
+    } else {
+      when(predicateEvaluator.getMatchingDictIds()).thenReturn(dictIds);
+    }
+
+    return new InvertedIndexFilterOperator(queryContext, predicateEvaluator, 
dataSource, NUM_DOCS);
+  }
+}
diff --git 
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkInvertedIndexGetNumMatchingDocs.java
 
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkInvertedIndexGetNumMatchingDocs.java
new file mode 100644
index 00000000000..9c06c9e61af
--- /dev/null
+++ 
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkInvertedIndexGetNumMatchingDocs.java
@@ -0,0 +1,239 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.perf;
+
+import java.util.Arrays;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+import org.roaringbitmap.buffer.BufferFastAggregation;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+
+/**
+ * Isolates the work performed by {@code 
InvertedIndexFilterOperator.getNumMatchingDocs()}
+ * on the multi-dictId (numDictIds >= 3) path:
+ *
+ * <pre>
+ *   MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+ *   for (int dictId : dictIds) {
+ *     bitmap.or(_invertedIndexReader.getDocIds(dictId));
+ *   }
+ *   count = bitmap.getCardinality();
+ * </pre>
+ *
+ * <p>Three variants are compared on the same input:
+ * <ul>
+ *   <li>{@code currentMaterialize} — the baseline (what the operator did 
before this change, and what the
+ *       MV path still does). Allocates a fresh accumulator, ORs in every 
per-dictId bitmap, then takes the
+ *       cardinality of the materialized union.</li>
+ *   <li>{@code svSumCardinalities} — the implemented fix for single-value 
columns. Bitmaps are guaranteed
+ *       disjoint, so the union cardinality is the sum of per-bitmap 
cardinalities. No allocation, no OR
+ *       pass. Correctness is undefined for MV columns (would double-count 
overlapping docIds); we include
+ *       it in the MV runs purely for comparison.</li>
+ *   <li>{@code mvFastOrCardinality} — a prototyped (but not implemented) 
variant for multi-value columns.
+ *       {@link BufferFastAggregation#orCardinality} streams through 
containers and computes the union
+ *       cardinality without materializing the result. Wins inside K in [~256, 
~10K] but regresses
+ *       outside that window — see the commit message for the threshold 
rationale.</li>
+ * </ul>
+ *
+ * <p>Sweep parameters reflect realistic settings for {@code COUNT(*) WHERE 
col IN (...)}:
+ * <ul>
+ *   <li>{@code _numDocs} — segment size (5M ~ one realtime segment).</li>
+ *   <li>{@code _dictionaryCardinality} — distinct values in the column.</li>
+ *   <li>{@code _numDictIdsMatched} — width of the IN clause (the K in N-K 
OR).</li>
+ *   <li>{@code _multiValue} — SV columns produce disjoint bitmaps; MV columns 
produce overlapping bitmaps.</li>
+ * </ul>
+ *
+ * <p>Cells where numDictIdsMatched > cardinality are skipped via a null 
sentinel set in {@link #setup()}.
+ *
+ * <p>Run: {@code java -jar pinot-perf/target/benchmarks.jar 
BenchmarkInvertedIndexGetNumMatchingDocs}
+ */
+@State(Scope.Benchmark)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Fork(value = 1, warmups = 0)
+@Warmup(iterations = 2, time = 2)
+@Measurement(iterations = 3, time = 2)
+public class BenchmarkInvertedIndexGetNumMatchingDocs {
+
+  @Param({"5000000"})
+  int _numDocs;
+
+  @Param({"1024", "100000", "1000000"})
+  int _dictionaryCardinality;
+
+  @Param({"4", "16", "64", "256", "1000", "10000", "100000"})
+  int _numDictIdsMatched;
+
+  @Param({"false", "true"})
+  boolean _multiValue;
+
+  /** For MV columns, average number of dictIds per doc. Ignored for SV. */
+  @Param({"4"})
+  int _mvValuesPerDoc;
+
+  /** The K bitmaps the operator will OR together — exactly what
+   * {@code _invertedIndexReader.getDocIds(dictIds[i])} returns inside the 
operator.
+   * Null for invalid cells (K > cardinality). */
+  private ImmutableRoaringBitmap[] _bitmaps;
+
+  // Cache the full per-dictId index across @Param combinations within a 
single JVM (fork). JMH runs all
+  // combos sequentially per fork, so building the 5M-doc index once per 
(cardinality, multiValue) tuple
+  // turns ~42 rebuilds into 6 actual builds.
+  private static int _cachedNumDocs = -1;
+  private static int _cachedCardinality = -1;
+  private static boolean _cachedMultiValue;
+  private static int _cachedMvValuesPerDoc = -1;
+  private static MutableRoaringBitmap[] _cachedFull;
+  // Packed (cardinality << 32) | index, ascending. Top-K is the last K 
entries.
+  private static long[] _cachedSorted;
+
+  @Setup(Level.Trial)
+  public void setup() {
+    if (_numDictIdsMatched > _dictionaryCardinality) {
+      // K can't exceed dictionary cardinality; benchmark methods 
short-circuit on null.
+      _bitmaps = null;
+      return;
+    }
+    MutableRoaringBitmap[] full = getOrBuildFull();
+    long[] sorted = _cachedSorted;
+    _bitmaps = new ImmutableRoaringBitmap[_numDictIdsMatched];
+    int n = _dictionaryCardinality;
+    // Pick top-K (last K entries of the ascending sort) so the workload 
targets common values.
+    for (int i = 0; i < _numDictIdsMatched; i++) {
+      int idx = (int) (sorted[n - 1 - i] & 0xFFFFFFFFL);
+      _bitmaps[i] = full[idx];
+    }
+  }
+
+  private MutableRoaringBitmap[] getOrBuildFull() {
+    if (_cachedFull != null && _cachedNumDocs == _numDocs && 
_cachedCardinality == _dictionaryCardinality
+        && _cachedMultiValue == _multiValue && _cachedMvValuesPerDoc == 
_mvValuesPerDoc) {
+      return _cachedFull;
+    }
+    Random random = new Random(42);
+    MutableRoaringBitmap[] full = new 
MutableRoaringBitmap[_dictionaryCardinality];
+    for (int i = 0; i < _dictionaryCardinality; i++) {
+      full[i] = new MutableRoaringBitmap();
+    }
+    if (_multiValue) {
+      // MV: each doc gets _mvValuesPerDoc random dictIds. Bitmaps overlap.
+      for (int docId = 0; docId < _numDocs; docId++) {
+        for (int v = 0; v < _mvValuesPerDoc; v++) {
+          full[random.nextInt(_dictionaryCardinality)].add(docId);
+        }
+      }
+    } else {
+      // SV: each doc gets exactly one dictId. Bitmaps are disjoint.
+      for (int docId = 0; docId < _numDocs; docId++) {
+        full[random.nextInt(_dictionaryCardinality)].add(docId);
+      }
+    }
+    // Optimize containers (matches what offline segment loading produces).
+    for (MutableRoaringBitmap b : full) {
+      b.runOptimize();
+    }
+    // Sort by cardinality ascending; top-K = last K. Arrays.sort(long[]) is 
O(n log n) — critical at
+    // cardinality=1M where the original selection-sort approach would have 
been O(K * n) ~ 10^11 ops.
+    long[] sorted = new long[_dictionaryCardinality];
+    for (int i = 0; i < _dictionaryCardinality; i++) {
+      sorted[i] = ((long) full[i].getCardinality() << 32) | (i & 0xFFFFFFFFL);
+    }
+    Arrays.sort(sorted);
+
+    _cachedFull = full;
+    _cachedSorted = sorted;
+    _cachedNumDocs = _numDocs;
+    _cachedCardinality = _dictionaryCardinality;
+    _cachedMultiValue = _multiValue;
+    _cachedMvValuesPerDoc = _mvValuesPerDoc;
+    return full;
+  }
+
+  /**
+   * Baseline: what the operator did before this change, and what the MV path 
still does. Allocates a
+   * fresh accumulator, ORs in every per-dictId bitmap, then takes the 
cardinality of the materialized
+   * union.
+   */
+  @Benchmark
+  public int currentMaterialize() {
+    if (_bitmaps == null) {
+      return 0;
+    }
+    MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+    for (ImmutableRoaringBitmap b : _bitmaps) {
+      bitmap.or(b);
+    }
+    return bitmap.getCardinality();
+  }
+
+  /**
+   * Shipped fix for single-value columns. Bitmaps from a per-dictId inverted 
index on an SV column are
+   * disjoint, so |⋃ b_i| = Σ |b_i|. No allocation, no OR pass. Correctness is 
undefined for MV columns
+   * (would double-count overlapping docIds); included in the MV runs purely 
for comparison.
+   */
+  @Benchmark
+  public long svSumCardinalities() {
+    if (_bitmaps == null) {
+      return 0;
+    }
+    long count = 0;
+    for (ImmutableRoaringBitmap b : _bitmaps) {
+      count += b.getCardinality();
+    }
+    return count;
+  }
+
+  /**
+   * Prototyped (but not shipped) variant for multi-value columns. 
RoaringBitmap's streaming
+   * union-cardinality avoids materializing the merged bitmap. Wins inside K 
in [~256, ~10K] but regresses
+   * at low K (4-16) and at very high K (~100K) on high-cardinality columns 
where horizontal-OR's fixed
+   * scratch-buffer allocation and two-pass scan amortize poorly.
+   */
+  @Benchmark
+  public int mvFastOrCardinality() {
+    if (_bitmaps == null) {
+      return 0;
+    }
+    return BufferFastAggregation.orCardinality(_bitmaps);
+  }
+
+  public static void main(String[] args)
+      throws Exception {
+    new Runner(new OptionsBuilder()
+        
.include(BenchmarkInvertedIndexGetNumMatchingDocs.class.getSimpleName())
+        .build())
+        .run();
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to