This is an automated email from the ASF dual-hosted git repository.
yashmayya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 6d319a5715a Avoid cloning the first bitmap when intersecting
bitmap-based doc id sets in AndDocIdSet (#18912)
6d319a5715a is described below
commit 6d319a5715adbdf82251305de58347ebe2efa552
Author: Gonzalo Ortiz Jaureguizar <[email protected]>
AuthorDate: Thu Jul 2 20:58:27 2026 +0200
Avoid cloning the first bitmap when intersecting bitmap-based doc id sets
in AndDocIdSet (#18912)
---
.../pinot/core/operator/docidsets/AndDocIdSet.java | 9 +-
.../core/operator/docidsets/AndDocIdSetTest.java | 128 +++++++++++++++++++++
2 files changed, 135 insertions(+), 2 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
index a2a7c3294d2..616b1a4542f 100644
---
a/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
+++
b/pinot-core/src/main/java/org/apache/pinot/core/operator/docidsets/AndDocIdSet.java
@@ -157,8 +157,13 @@ public final class AndDocIdSet implements BlockDocIdSet {
if (numBitmapBasedDocIdIterators == 1) {
docIds = bitmapBasedDocIdIterators.get(0).getDocIds();
} else {
- MutableRoaringBitmap mutableDocIds =
bitmapBasedDocIdIterators.get(0).getDocIds().toMutableRoaringBitmap();
- for (int i = 1; i < numBitmapBasedDocIdIterators; i++) {
+ // NOTE: Intersect the two lowest-cardinality bitmaps (guaranteed by
the sort above) into a fresh result
+ // with the static and(), which allocates only the
intersection's containers, then intersect the
+ // remaining bitmaps into it in place (the intersection is
usually much smaller than any operand).
+ // Inputs are never mutated.
+ MutableRoaringBitmap mutableDocIds = ImmutableRoaringBitmap.and(
+ bitmapBasedDocIdIterators.get(0).getDocIds(),
bitmapBasedDocIdIterators.get(1).getDocIds());
+ for (int i = 2; i < numBitmapBasedDocIdIterators; i++) {
mutableDocIds.and(bitmapBasedDocIdIterators.get(i).getDocIds());
}
docIds = mutableDocIds;
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/operator/docidsets/AndDocIdSetTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/operator/docidsets/AndDocIdSetTest.java
new file mode 100644
index 00000000000..1c6dc391fd9
--- /dev/null
+++
b/pinot-core/src/test/java/org/apache/pinot/core/operator/docidsets/AndDocIdSetTest.java
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.operator.docidsets;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import org.apache.pinot.core.common.BlockDocIdIterator;
+import org.apache.pinot.core.common.BlockDocIdSet;
+import org.apache.pinot.segment.spi.Constants;
+import org.roaringbitmap.buffer.ImmutableRoaringBitmap;
+import org.roaringbitmap.buffer.MutableRoaringBitmap;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+/**
+ * Unit test for {@link AndDocIdSet}, focusing on the bitmap-only intersection
path.
+ */
+public class AndDocIdSetTest {
+ private static final long RANDOM_SEED = System.currentTimeMillis();
+ private static final Random RANDOM = new Random(RANDOM_SEED);
+ private static final String ERROR_MESSAGE = "Random seed: " + RANDOM_SEED;
+ private static final int NUM_DOCS = 100000;
+
+ @Test
+ public void testBitmapOnlyIntersection() {
+ // Cover both the 2-bitmap and the 3+-bitmap paths, mixing operand shapes:
dense (bitmap containers),
+ // sparse (array containers), run-optimized, and a serialized
buffer-backed ImmutableRoaringBitmap (the
+ // shape produced by memory-mapped index readers).
+ for (int numBitmaps = 2; numBitmaps <= 4; numBitmaps++) {
+ ImmutableRoaringBitmap[] bitmaps = new
ImmutableRoaringBitmap[numBitmaps];
+ MutableRoaringBitmap expected = null;
+ for (int i = 0; i < numBitmaps; i++) {
+ MutableRoaringBitmap bitmap = randomBitmap(i == 1 ? 0.001 : 0.2 + 0.2
* i);
+ switch (i % 4) {
+ case 1:
+ // Sparse bitmap left as-is (array containers)
+ bitmaps[i] = bitmap;
+ break;
+ case 2:
+ bitmap.runOptimize();
+ bitmaps[i] = bitmap;
+ break;
+ case 3:
+ bitmaps[i] = serialize(bitmap);
+ break;
+ default:
+ bitmaps[i] = bitmap;
+ break;
+ }
+ expected = expected == null ? bitmaps[i].toMutableRoaringBitmap()
+ : MutableRoaringBitmap.and(expected,
bitmaps[i].toMutableRoaringBitmap());
+ }
+ ImmutableRoaringBitmap[] copies = new ImmutableRoaringBitmap[numBitmaps];
+ List<BlockDocIdSet> docIdSets = new ArrayList<>(numBitmaps);
+ for (int i = 0; i < numBitmaps; i++) {
+ copies[i] = bitmaps[i].toMutableRoaringBitmap();
+ docIdSets.add(new BitmapDocIdSet(bitmaps[i], NUM_DOCS));
+ }
+
+ int[] actualDocIds = collectDocIds(new AndDocIdSet(docIdSets, null));
+
+ assertEquals(actualDocIds, expected.toArray(), ERROR_MESSAGE);
+ // The intersection must not mutate the input bitmaps (they may be
shared or backed by index buffers)
+ for (int i = 0; i < numBitmaps; i++) {
+ assertEquals(bitmaps[i].toMutableRoaringBitmap(), copies[i],
ERROR_MESSAGE);
+ }
+ }
+ }
+
+ @Test
+ public void testBitmapOnlyIntersectionEmptyResult() {
+ MutableRoaringBitmap first = MutableRoaringBitmap.bitmapOf(1, 3, 5);
+ MutableRoaringBitmap second = MutableRoaringBitmap.bitmapOf(0, 2, 4);
+ List<BlockDocIdSet> docIdSets =
+ List.of(new BitmapDocIdSet(first, NUM_DOCS), new
BitmapDocIdSet(second, NUM_DOCS));
+
+ int[] actualDocIds = collectDocIds(new AndDocIdSet(docIdSets, null));
+
+ assertEquals(actualDocIds.length, 0);
+ }
+
+ private static MutableRoaringBitmap randomBitmap(double density) {
+ MutableRoaringBitmap bitmap = new MutableRoaringBitmap();
+ for (int docId = 0; docId < NUM_DOCS; docId++) {
+ if (RANDOM.nextDouble() < density) {
+ bitmap.add(docId);
+ }
+ }
+ return bitmap;
+ }
+
+ private static ImmutableRoaringBitmap serialize(MutableRoaringBitmap bitmap)
{
+ ByteBuffer buffer = ByteBuffer.allocate(bitmap.serializedSizeInBytes());
+ bitmap.serialize(buffer);
+ buffer.flip();
+ return new ImmutableRoaringBitmap(buffer);
+ }
+
+ private static int[] collectDocIds(BlockDocIdSet docIdSet) {
+ BlockDocIdIterator iterator = docIdSet.iterator();
+ List<Integer> docIds = new ArrayList<>();
+ int docId;
+ while ((docId = iterator.next()) != Constants.EOF) {
+ docIds.add(docId);
+ }
+ return docIds.stream().mapToInt(Integer::intValue).toArray();
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]