This is an automated email from the ASF dual-hosted git repository.
Jackie-Jiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 4660ed5700e Avoid materializing the full bitmap for count aggregation
with filters for SV columns (#18543)
4660ed5700e is described below
commit 4660ed5700e7138ec1ee6ae941396d5b5cbb8d8a
Author: Siddharth Teotia <[email protected]>
AuthorDate: Sun Jun 21 19:08:36 2026 -0700
Avoid materializing the full bitmap for count aggregation with filters for
SV columns (#18543)
---
.../filter/InvertedIndexFilterOperator.java | 52 +++--
.../filter/InvertedIndexFilterOperatorTest.java | 132 ++++++++++++
.../BenchmarkInvertedIndexGetNumMatchingDocs.java | 239 +++++++++++++++++++++
3 files changed, 403 insertions(+), 20 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java
index a82ba160f16..97acb3b326e 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperator.java
@@ -44,6 +44,7 @@ public class InvertedIndexFilterOperator extends
BaseColumnFilterOperator {
private final PredicateEvaluator _predicateEvaluator;
private final InvertedIndexReader<ImmutableRoaringBitmap>
_invertedIndexReader;
private final boolean _exclusive;
+ private final boolean _isSingleValue;
InvertedIndexFilterOperator(QueryContext queryContext, PredicateEvaluator
predicateEvaluator, DataSource dataSource,
int numDocs) {
@@ -54,6 +55,7 @@ public class InvertedIndexFilterOperator extends
BaseColumnFilterOperator {
(InvertedIndexReader<ImmutableRoaringBitmap>)
dataSource.getInvertedIndex();
_invertedIndexReader = invertedIndexReader;
_exclusive = predicateEvaluator.isExclusive();
+ _isSingleValue = dataSource.getDataSourceMetadata().isSingleValue();
}
@Override
@@ -102,28 +104,38 @@ public class InvertedIndexFilterOperator extends
BaseColumnFilterOperator {
@Override
public int getNumMatchingDocs() {
- int count = 0;
int[] dictIds = _exclusive ? _predicateEvaluator.getNonMatchingDictIds() :
_predicateEvaluator.getMatchingDictIds();
- switch (dictIds.length) {
- case 0:
- break;
- case 1: {
- count = _invertedIndexReader.getDocIds(dictIds[0]).getCardinality();
- break;
+ int count;
+ if (_isSingleValue) {
+ // On a single-value column, per-dictId bitmaps partition the docId
space (each docId has exactly one
+ // dictId), so the union cardinality equals the sum of per-bitmap
cardinalities. No scratch bitmap is
+ // allocated and no OR pass is performed.
+ count = 0;
+ for (int dictId : dictIds) {
+ count += _invertedIndexReader.getDocIds(dictId).getCardinality();
}
- case 2: {
- count =
ImmutableRoaringBitmap.orCardinality(_invertedIndexReader.getDocIds(dictIds[0]),
- _invertedIndexReader.getDocIds(dictIds[1]));
- break;
- }
- default: {
- // this could be optimised if the bitmaps are known to be disjoint (as
in a single value bitmap index)
- MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
- for (int dictId : dictIds) {
- bitmap.or(_invertedIndexReader.getDocIds(dictId));
- }
- count = bitmap.getCardinality();
- break;
+ } else {
+ // TODO: For MV column, per-dictId bitmaps may overlap, so we must
materialize the union to count.
+ // A streaming union-cardinality variant was benchmarked but not
implemented as a few combinations of
+ // cardinality and number of matching dictIds shows regressions.
+ count = 0;
+ switch (dictIds.length) {
+ case 0:
+ break;
+ case 1:
+ count = _invertedIndexReader.getDocIds(dictIds[0]).getCardinality();
+ break;
+ case 2:
+ count =
ImmutableRoaringBitmap.orCardinality(_invertedIndexReader.getDocIds(dictIds[0]),
+ _invertedIndexReader.getDocIds(dictIds[1]));
+ break;
+ default:
+ MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+ for (int dictId : dictIds) {
+ bitmap.or(_invertedIndexReader.getDocIds(dictId));
+ }
+ count = bitmap.getCardinality();
+ break;
}
}
return _exclusive ? _numDocs - count : count;
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperatorTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperatorTest.java
new file mode 100644
index 00000000000..b90095ff7d5
--- /dev/null
+++
b/pinot-core/src/test/java/org/apache/pinot/core/operator/filter/InvertedIndexFilterOperatorTest.java
@@ -0,0 +1,132 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.filter;
+
+import org.apache.pinot.core.operator.filter.predicate.PredicateEvaluator;
+import org.apache.pinot.core.query.request.context.QueryContext;
+import org.apache.pinot.segment.spi.datasource.DataSource;
+import org.apache.pinot.segment.spi.datasource.DataSourceMetadata;
+import org.apache.pinot.segment.spi.index.reader.InvertedIndexReader;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+import org.testng.annotations.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+
+
+/**
+ * Targeted tests for {@link
InvertedIndexFilterOperator#getNumMatchingDocs()}. Five tests cover every
+ * distinct code branch (SV loop, MV switch arms {0, 2, default}, and
exclusive arithmetic)
+ */
+public class InvertedIndexFilterOperatorTest {
+ private static final int NUM_DOCS = 1000;
+
+ // SV path: union cardinality equals sum of per-bitmap cardinalities under
the disjoint invariant
+ @Test
+ public void testSvSumCardinalities() {
+ ImmutableRoaringBitmap b0 = bitmap(0, 1, 2, 3);
+ ImmutableRoaringBitmap b1 = bitmap(4, 5, 6);
+ ImmutableRoaringBitmap b2 = bitmap(7, 8, 9, 10, 11);
+ InvertedIndexFilterOperator operator =
+ newOperator(true, false, new int[]{0, 1, 2}, new
ImmutableRoaringBitmap[]{b0, b1, b2});
+ assertEquals(operator.getNumMatchingDocs(), 12);
+ }
+
+ // SV exclusive path: numDocs minus sum of non-matching cardinalities; also
exercises getNonMatchingDictIds
+ @Test
+ public void testSvExclusive() {
+ ImmutableRoaringBitmap b0 = bitmap(0, 1, 2, 3);
+ ImmutableRoaringBitmap b1 = bitmap(4, 5, 6);
+ InvertedIndexFilterOperator operator =
+ newOperator(true, true, new int[]{0, 1}, new
ImmutableRoaringBitmap[]{b0, b1});
+ assertEquals(operator.getNumMatchingDocs(), NUM_DOCS - 7);
+ }
+
+ // MV {@code case 2} arm: pairwise {@code
ImmutableRoaringBitmap.orCardinality} with overlap
+ @Test
+ public void testMvCase2Pairwise() {
+ ImmutableRoaringBitmap b0 = bitmap(0, 1, 2, 3, 4);
+ ImmutableRoaringBitmap b1 = bitmap(3, 4, 5, 6);
+ InvertedIndexFilterOperator operator =
+ newOperator(false, false, new int[]{0, 1}, new
ImmutableRoaringBitmap[]{b0, b1});
+ // Union {0,1,2,3,4,5,6} -> 7. Sum-of-cardinalities would give 9 (wrong
for MV).
+ assertEquals(operator.getNumMatchingDocs(), 7);
+ }
+
+ // MV default arm: regression guard — must return UNION cardinality, never
sum
+ @Test
+ public void testMvUnionWithOverlap() {
+ ImmutableRoaringBitmap b0 = bitmap(0, 1, 2, 3, 4);
+ ImmutableRoaringBitmap b1 = bitmap(3, 4, 5, 6);
+ ImmutableRoaringBitmap b2 = bitmap(5, 6, 7);
+ InvertedIndexFilterOperator operator =
+ newOperator(false, false, new int[]{0, 1, 2}, new
ImmutableRoaringBitmap[]{b0, b1, b2});
+ // Union {0..7} -> 8. Sum-of-cardinalities would give 12.
+ assertEquals(operator.getNumMatchingDocs(), 8);
+ }
+
+ // Empty dictIds + exclusive: an easy-to-miss edge — must return numDocs,
not 0. Also exercises MV case 0
+ @Test
+ public void testMvEmptyDictIdsExclusive() {
+ InvertedIndexFilterOperator operator =
+ newOperator(false, true, new int[0], new ImmutableRoaringBitmap[0]);
+ assertEquals(operator.getNumMatchingDocs(), NUM_DOCS);
+ }
+
+ // ----- helpers -----
+
+ private static ImmutableRoaringBitmap bitmap(int... docIds) {
+ MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+ for (int docId : docIds) {
+ bitmap.add(docId);
+ }
+ return bitmap;
+ }
+
+ @SuppressWarnings({"unchecked", "rawtypes"})
+ private static InvertedIndexFilterOperator newOperator(boolean singleValue,
boolean exclusive, int[] dictIds,
+ ImmutableRoaringBitmap[] perDictIdBitmaps) {
+ QueryContext queryContext = mock(QueryContext.class);
+ when(queryContext.isNullHandlingEnabled()).thenReturn(false);
+
+ DataSourceMetadata metadata = mock(DataSourceMetadata.class);
+ when(metadata.isSingleValue()).thenReturn(singleValue);
+
+ InvertedIndexReader reader = mock(InvertedIndexReader.class);
+ for (int i = 0; i < dictIds.length; i++) {
+ when(reader.getDocIds(dictIds[i])).thenReturn(perDictIdBitmaps[i]);
+ }
+
+ DataSource dataSource = mock(DataSource.class);
+ when(dataSource.getDataSourceMetadata()).thenReturn(metadata);
+ when(dataSource.getInvertedIndex()).thenReturn(reader);
+
+ PredicateEvaluator predicateEvaluator = mock(PredicateEvaluator.class);
+ when(predicateEvaluator.isExclusive()).thenReturn(exclusive);
+ if (exclusive) {
+ when(predicateEvaluator.getNonMatchingDictIds()).thenReturn(dictIds);
+ } else {
+ when(predicateEvaluator.getMatchingDictIds()).thenReturn(dictIds);
+ }
+
+ return new InvertedIndexFilterOperator(queryContext, predicateEvaluator,
dataSource, NUM_DOCS);
+ }
+}
diff --git
a/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkInvertedIndexGetNumMatchingDocs.java
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkInvertedIndexGetNumMatchingDocs.java
new file mode 100644
index 00000000000..9c06c9e61af
--- /dev/null
+++
b/pinot-perf/src/main/java/org/apache/pinot/perf/BenchmarkInvertedIndexGetNumMatchingDocs.java
@@ -0,0 +1,239 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.perf;
+
+import java.util.Arrays;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.runner.Runner;
+import org.openjdk.jmh.runner.options.OptionsBuilder;
+import org.roaringbitmap.buffer.BufferFastAggregation;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+
+
+/**
+ * Isolates the work performed by {@code
InvertedIndexFilterOperator.getNumMatchingDocs()}
+ * on the multi-dictId (numDictIds >= 3) path:
+ *
+ * <pre>
+ * MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+ * for (int dictId : dictIds) {
+ * bitmap.or(_invertedIndexReader.getDocIds(dictId));
+ * }
+ * count = bitmap.getCardinality();
+ * </pre>
+ *
+ * <p>Three variants are compared on the same input:
+ * <ul>
+ * <li>{@code currentMaterialize} — the baseline (what the operator did
before this change, and what the
+ * MV path still does). Allocates a fresh accumulator, ORs in every
per-dictId bitmap, then takes the
+ * cardinality of the materialized union.</li>
+ * <li>{@code svSumCardinalities} — the implemented fix for single-value
columns. Bitmaps are guaranteed
+ * disjoint, so the union cardinality is the sum of per-bitmap
cardinalities. No allocation, no OR
+ * pass. Correctness is undefined for MV columns (would double-count
overlapping docIds); we include
+ * it in the MV runs purely for comparison.</li>
+ * <li>{@code mvFastOrCardinality} — a prototyped (but not implemented)
variant for multi-value columns.
+ * {@link BufferFastAggregation#orCardinality} streams through
containers and computes the union
+ * cardinality without materializing the result. Wins inside K in [~256,
~10K] but regresses
+ * outside that window — see the commit message for the threshold
rationale.</li>
+ * </ul>
+ *
+ * <p>Sweep parameters reflect realistic settings for {@code COUNT(*) WHERE
col IN (...)}:
+ * <ul>
+ * <li>{@code _numDocs} — segment size (5M ~ one realtime segment).</li>
+ * <li>{@code _dictionaryCardinality} — distinct values in the column.</li>
+ * <li>{@code _numDictIdsMatched} — width of the IN clause (the K in N-K
OR).</li>
+ * <li>{@code _multiValue} — SV columns produce disjoint bitmaps; MV columns
produce overlapping bitmaps.</li>
+ * </ul>
+ *
+ * <p>Cells where numDictIdsMatched > cardinality are skipped via a null
sentinel set in {@link #setup()}.
+ *
+ * <p>Run: {@code java -jar pinot-perf/target/benchmarks.jar
BenchmarkInvertedIndexGetNumMatchingDocs}
+ */
+@State(Scope.Benchmark)
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.MICROSECONDS)
+@Fork(value = 1, warmups = 0)
+@Warmup(iterations = 2, time = 2)
+@Measurement(iterations = 3, time = 2)
+public class BenchmarkInvertedIndexGetNumMatchingDocs {
+
+ @Param({"5000000"})
+ int _numDocs;
+
+ @Param({"1024", "100000", "1000000"})
+ int _dictionaryCardinality;
+
+ @Param({"4", "16", "64", "256", "1000", "10000", "100000"})
+ int _numDictIdsMatched;
+
+ @Param({"false", "true"})
+ boolean _multiValue;
+
+ /** For MV columns, average number of dictIds per doc. Ignored for SV. */
+ @Param({"4"})
+ int _mvValuesPerDoc;
+
+ /** The K bitmaps the operator will OR together — exactly what
+ * {@code _invertedIndexReader.getDocIds(dictIds[i])} returns inside the
operator.
+ * Null for invalid cells (K > cardinality). */
+ private ImmutableRoaringBitmap[] _bitmaps;
+
+ // Cache the full per-dictId index across @Param combinations within a
single JVM (fork). JMH runs all
+ // combos sequentially per fork, so building the 5M-doc index once per
(cardinality, multiValue) tuple
+ // turns ~42 rebuilds into 6 actual builds.
+ private static int _cachedNumDocs = -1;
+ private static int _cachedCardinality = -1;
+ private static boolean _cachedMultiValue;
+ private static int _cachedMvValuesPerDoc = -1;
+ private static MutableRoaringBitmap[] _cachedFull;
+ // Packed (cardinality << 32) | index, ascending. Top-K is the last K
entries.
+ private static long[] _cachedSorted;
+
+ @Setup(Level.Trial)
+ public void setup() {
+ if (_numDictIdsMatched > _dictionaryCardinality) {
+ // K can't exceed dictionary cardinality; benchmark methods
short-circuit on null.
+ _bitmaps = null;
+ return;
+ }
+ MutableRoaringBitmap[] full = getOrBuildFull();
+ long[] sorted = _cachedSorted;
+ _bitmaps = new ImmutableRoaringBitmap[_numDictIdsMatched];
+ int n = _dictionaryCardinality;
+ // Pick top-K (last K entries of the ascending sort) so the workload
targets common values.
+ for (int i = 0; i < _numDictIdsMatched; i++) {
+ int idx = (int) (sorted[n - 1 - i] & 0xFFFFFFFFL);
+ _bitmaps[i] = full[idx];
+ }
+ }
+
+ private MutableRoaringBitmap[] getOrBuildFull() {
+ if (_cachedFull != null && _cachedNumDocs == _numDocs &&
_cachedCardinality == _dictionaryCardinality
+ && _cachedMultiValue == _multiValue && _cachedMvValuesPerDoc ==
_mvValuesPerDoc) {
+ return _cachedFull;
+ }
+ Random random = new Random(42);
+ MutableRoaringBitmap[] full = new
MutableRoaringBitmap[_dictionaryCardinality];
+ for (int i = 0; i < _dictionaryCardinality; i++) {
+ full[i] = new MutableRoaringBitmap();
+ }
+ if (_multiValue) {
+ // MV: each doc gets _mvValuesPerDoc random dictIds. Bitmaps overlap.
+ for (int docId = 0; docId < _numDocs; docId++) {
+ for (int v = 0; v < _mvValuesPerDoc; v++) {
+ full[random.nextInt(_dictionaryCardinality)].add(docId);
+ }
+ }
+ } else {
+ // SV: each doc gets exactly one dictId. Bitmaps are disjoint.
+ for (int docId = 0; docId < _numDocs; docId++) {
+ full[random.nextInt(_dictionaryCardinality)].add(docId);
+ }
+ }
+ // Optimize containers (matches what offline segment loading produces).
+ for (MutableRoaringBitmap b : full) {
+ b.runOptimize();
+ }
+ // Sort by cardinality ascending; top-K = last K. Arrays.sort(long[]) is
O(n log n) — critical at
+ // cardinality=1M where the original selection-sort approach would have
been O(K * n) ~ 10^11 ops.
+ long[] sorted = new long[_dictionaryCardinality];
+ for (int i = 0; i < _dictionaryCardinality; i++) {
+ sorted[i] = ((long) full[i].getCardinality() << 32) | (i & 0xFFFFFFFFL);
+ }
+ Arrays.sort(sorted);
+
+ _cachedFull = full;
+ _cachedSorted = sorted;
+ _cachedNumDocs = _numDocs;
+ _cachedCardinality = _dictionaryCardinality;
+ _cachedMultiValue = _multiValue;
+ _cachedMvValuesPerDoc = _mvValuesPerDoc;
+ return full;
+ }
+
+ /**
+ * Baseline: what the operator did before this change, and what the MV path
still does. Allocates a
+ * fresh accumulator, ORs in every per-dictId bitmap, then takes the
cardinality of the materialized
+ * union.
+ */
+ @Benchmark
+ public int currentMaterialize() {
+ if (_bitmaps == null) {
+ return 0;
+ }
+ MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+ for (ImmutableRoaringBitmap b : _bitmaps) {
+ bitmap.or(b);
+ }
+ return bitmap.getCardinality();
+ }
+
+ /**
+ * Shipped fix for single-value columns. Bitmaps from a per-dictId inverted
index on an SV column are
+ * disjoint, so |⋃ b_i| = Σ |b_i|. No allocation, no OR pass. Correctness is
undefined for MV columns
+ * (would double-count overlapping docIds); included in the MV runs purely
for comparison.
+ */
+ @Benchmark
+ public long svSumCardinalities() {
+ if (_bitmaps == null) {
+ return 0;
+ }
+ long count = 0;
+ for (ImmutableRoaringBitmap b : _bitmaps) {
+ count += b.getCardinality();
+ }
+ return count;
+ }
+
+ /**
+ * Prototyped (but not shipped) variant for multi-value columns.
RoaringBitmap's streaming
+ * union-cardinality avoids materializing the merged bitmap. Wins inside K
in [~256, ~10K] but regresses
+ * at low K (4-16) and at very high K (~100K) on high-cardinality columns
where horizontal-OR's fixed
+ * scratch-buffer allocation and two-pass scan amortize poorly.
+ */
+ @Benchmark
+ public int mvFastOrCardinality() {
+ if (_bitmaps == null) {
+ return 0;
+ }
+ return BufferFastAggregation.orCardinality(_bitmaps);
+ }
+
+ public static void main(String[] args)
+ throws Exception {
+ new Runner(new OptionsBuilder()
+
.include(BenchmarkInvertedIndexGetNumMatchingDocs.class.getSimpleName())
+ .build())
+ .run();
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]